In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import os
from pathlib import Path
from scipy import stats
from statannot import add_stat_annotation

font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 8}
matplotlib.rc('font', **font)

In [None]:
drug = 'Nife'

df = pd.DataFrame()
for root, dirs, files in os.walk(Path('./'+drug)):
    if len(files)>0:
        folder = (root)
        for file in files:
            file_df = pd.read_csv(Path(root+'/'+file))
            # some silly string manipulation to get the concentration
            unit = file[-6:-4]
            conc = file[0:-6]
            conc = str(conc.replace('_', '.'))
            file_df = file_df.dropna(axis=0, how='any')
            file_df['conc'] = [conc]*file_df.shape[0]
            file_df['unit'] = [unit]*file_df.shape[0]
            file_df['folder'] = [folder]*file_df.shape[0]
            df = pd.concat([df, file_df], ignore_index = True)

df = df[df['t_rise_avg'] < 0.2]
#only keep paired data (5 datapoints)
df = df.groupby(['labels', 'folder']).filter(lambda x: len(x)==5)

# compute triangulation (ratio method) and overpotential
df['tri_ratio'] = (df['APD90_avg']-df['APD50_avg'])/(df['APD50_avg']-df['APD10_avg'])
df['tri_90_50'] = df['APD90_avg']/df['APD50_avg']
df['overpotential'] = (df['PP_Amp_avg']-df['SP_Amp_avg'])/df['SP_Amp_avg']
df['start_min'] = (df['PP_Amp_avg']-df['SP_Amp_avg'])
df['atrial'] = ((df['APD50_avg']/df['APD90_avg']) < 0.5)
df['period'] = 1/df['frequency']
df['c_APD90_Bazett'] = df['APD90_avg']/(np.sqrt(df['period']))
df['c_APD50_Bazett'] = df['APD50_avg']/(np.sqrt(df['period']))
df['c_APD10_Bazett'] = df['APD10_avg']/(np.sqrt(df['period']))
df['RL_CL'] = df['max_min_time']/df['period'] 

df['folder'].value_counts()

In [None]:
#datas = ['APD90_avg', 'APD50_avg', 'APD10_avg', 't_rise_avg', 'frequency',
#         'overpotential','triangle', 'RL_CL', 'period', 'c_APD90_Bazett', 'tri_90_50', 'tri_ratio']
#labels = ['APD90', 'APD50', 'APD10', 't-rise', 'frequency', 
#          'overpotential','triangulation', 'RL-to-CL', 'CL', 'cAPD90', 'triangulation_90to50', 'triangulation_old']
#units = ['/s','/s','/s','/s', '', '','/s', '', '/s', '', '', '']
datas = ['APD90_avg', 'c_APD90_Bazett', 'period', 'triangle']
labels = ['APD90', 'cAPD90', 'CL', 'triangulation']
units = ['/s', '', '/s', '/s']
if drug == 'Nife':
    order = ["0", "10", "30", "100", "300"]
    box_pairs = [("0", "10"), ("0", "30"), ("0", "100"), ("0", "300")] 
#                ,("10", "30"), ("30", "100"), ("100", "300"), 
#                ("10", "100"), ("10", "300"), ("30", "300")]
else:
    order = ["0", "0.3", "1", "3", "10"]
    box_pairs = [("0", "0.3"), ("0", "1"), ("0", "3"), ("0", "10")]
#                 ,("0.3", "1"), ("0.3", "3"), ("0.3", "10"),
#                 ("1", "3"), ("1", "10"), ("3", "10")]
for i in range(len(datas)):
    y_data = datas[i]
    print(datas[i])
    ylabel = labels[i]
    unit = units[i]
    fig, ax = plt.subplots(figsize=(2,2), dpi = 600)
#    if ylabel != 'CL' and ylabel !='frequency':
    sns.stripplot(data = df, x = 'conc', y = y_data, order=order, color='black', size=3, zorder=1, ax=ax)
    sns.pointplot(data = df, x = 'conc', y = y_data, order=order, ci='sd', scale=0.3, errwidth=1, capsize=0.05, zorder=2, ax=ax)
    sns.despine()
    
    #plot connections
    locs = []
    for i in range(len(order)):
        loc = ax.get_children()[i].get_offsets()
        locs.append(loc)
    for i in range(locs[0].shape[0]):
        for j in range(len(order)-1):
            x = [locs[j][i, 0], locs[j+1][i, 0]]
            y = [locs[j][i, 1], locs[j+1][i, 1]]
            ax.plot(x, y, color="black", linewidth=0.5, alpha=0.1)
    
    ax.set_xlabel('Concentration/' +df['unit'].unique()[0])
    ax.set_ylabel(ylabel+unit)
    test = add_stat_annotation(ax, data=df, x='conc', y=y_data, order=order,
                               box_pairs=box_pairs, test='Wilcoxon', text_format='star', 
                               line_height=0.02, text_offset=0.01, line_offset=0.01, line_offset_to_box=0.02,
                               loc='outside', linewidth=1, verbose=0)
    for o in order:
        dat = df[df['conc'] == o][y_data].to_numpy()
        print(o)
        print(stats.bootstrap([dat], np.mean, method="percentile"))
        #print(stats.shapiro(dat))
    for p in box_pairs:
        bef = df[df['conc'] == p[0]]
        aft = df[df['conc'] == p[1]]
        diff = aft[y_data].to_numpy() - bef[y_data].to_numpy()
        print(p)
        print(stats.shapiro(diff))
        #print(stats.skewtest(diff))
        #print(stats.kurtosistest(diff))
        print(stats.wilcoxon(bef[y_data], aft[y_data]))
        #print(stats.ttest_rel(bef[y_data].to_numpy(), aft[y_data].to_numpy()))
    plt.savefig(drug+'_'+ylabel+'_withsd.svg', format='svg', bbox_inches='tight')
#    plt.close()

In [None]:
# plot with connections
for i in range(len(datas)):
    y_data = datas[i]
    ylabel = labels[i]
    unit = units[i]
    fig, ax = plt.subplots(figsize=(3,3), dpi = 300)
    sns.stripplot(data = df, x = 'conc', y = y_data, order=order, size=3, color='black', ax=ax)
    sns.despine()
    #plot connections
    locs = []
    for i in range(len(order)):
        loc = ax.get_children()[i].get_offsets()
        locs.append(loc)
    for i in range(locs[0].shape[0]):
        for j in range(len(order)-1):
            x = [locs[j][i, 0], locs[j+1][i, 0]]
            y = [locs[j][i, 1], locs[j+1][i, 1]]
            ax.plot(x, y, color="black", linewidth=0.5, alpha=0.1)
    ax.set_xlabel('Concentration/' +df['unit'].unique()[0])
    ax.set_ylabel(ylabel+unit)
    test = add_stat_annotation(ax, data=df, x='conc', y=y_data, order=order,
                               box_pairs=box_pairs, test='t-test_paired', text_format='star', 
                               line_height=0.02, text_offset=0.01, line_offset=0.01, line_offset_to_box=0.02,
                               loc='outside', linewidth=1, verbose=0)
    fig.savefig(drug+'_'+ylabel+'_scatter.png', bbox_inches='tight')

In [None]:
# plot with connections by folder
for i in range(len(datas)):
    y_data = datas[i]
    ylabel = labels[i]
    unit = units[i]
    fig, ax = plt.subplots(figsize=(3,3), dpi = 300)
    sns.stripplot(data = df, x = 'conc', y = y_data, order=order, hue='folder', size=3, ax=ax)
    ax.get_legend().remove()
    sns.despine()
    #plot connections
    locs = []
    for i in range(len(order)):
        loc = ax.get_children()[i].get_offsets()
        locs.append(loc)
    for i in range(locs[0].shape[0]):
        for j in range(len(order)-1):
            x = [locs[j][i, 0], locs[j+1][i, 0]]
            y = [locs[j][i, 1], locs[j+1][i, 1]]
            ax.plot(x, y, color="black", linewidth=0.5, alpha=0.1)
    ax.set_xlabel('Concentration/' +df['unit'].unique()[0])
    ax.set_ylabel(ylabel+unit)
    test = add_stat_annotation(ax, data=df, x='conc', y=y_data, order=order,
                               box_pairs=box_pairs, test='t-test_paired', text_format='star', 
                               line_height=0.02, text_offset=0.01, line_offset=0.01, line_offset_to_box=0.02,
                               loc='outside', linewidth=1, verbose=0)
    fig.savefig(drug+'_'+ylabel+'_scatter_byfolder.png', bbox_inches='tight')

In [None]:
# plot with connections, finalized
datas = ['APD90_avg', 'period', 'c_APD90_Bazett']
labels = ['APD90', 'CL', 'cAPD90']
units = ['/s', '/s', '']
if drug == 'Nife':
    order = ["0", "10", "30", "100", "300"]
    box_pairs = [("0", "10"), ("0", "30"), ("0", "100"), ("0", "300")]
else:
    order = ["0", "0.3", "1", "3", "10"]
    box_pairs = [("0", "0.3"), ("0", "1"), ("0", "3"), ("0", "10")]
    
for i in range(len(datas)):
    y_data = datas[i]
    ylabel = labels[i]
    unit = units[i]
    fig, ax = plt.subplots(figsize=(2,2), dpi = 600)
    sns.stripplot(data = df, x = 'conc', y = y_data, order=order, size=3, color='black', ax=ax)
    sns.despine()
    #plot connections
    locs = []
    for i in range(len(order)):
        loc = ax.get_children()[i].get_offsets()
        locs.append(loc)
    for i in range(locs[0].shape[0]):
        for j in range(len(order)-1):
            x = [locs[j][i, 0], locs[j+1][i, 0]]
            y = [locs[j][i, 1], locs[j+1][i, 1]]
            ax.plot(x, y, color="black", linewidth=0.5, alpha=0.1)
    ax.set_xlabel('Concentration/' +df['unit'].unique()[0])
    ax.set_ylabel(ylabel+unit)
    test = add_stat_annotation(ax, data=df, x='conc', y=y_data, order=order,
                               box_pairs=box_pairs, test='t-test_paired', comparisons_correction = None,text_format='star', 
                               line_height=0.02, text_offset=0.01, line_offset=0.01, line_offset_to_box=0.02,
                               loc='outside', linewidth=1, verbose=0)
    plt.savefig(drug+'_'+ylabel+'_scatter.svg', format='svg', bbox_inches='tight')