In [1]:
import numpy as np
import pandas as pd
from scipy.stats import wilcoxon


In [2]:
df = pd.read_csv('../pipeline/ibs_data_all.csv')
df = df.drop('times', axis=1)
df.columns = df.columns.str[9:]
df.columns = ['times'] + list(df.columns[1:])
df

Unnamed: 0,times,add_f10000_maf0.2_cens0.1,add_f10000_maf0.2_cens0.4,add_f10000_maf0.2_cens0.8,add_f10000_maf0.4_cens0.1,add_f10000_maf0.4_cens0.4,add_f10000_maf0.4_cens0.8,add_f1000_maf0.2_cens0.1,add_f1000_maf0.2_cens0.4,add_f1000_maf0.2_cens0.8,...,me_f1000_maf0.2_cens0.8,me_f1000_maf0.4_cens0.1,me_f1000_maf0.4_cens0.4,me_f1000_maf0.4_cens0.8,me_f100_maf0.2_cens0.1,me_f100_maf0.2_cens0.4,me_f100_maf0.2_cens0.8,me_f100_maf0.4_cens0.1,me_f100_maf0.4_cens0.4,me_f100_maf0.4_cens0.8
0,0,0.002503,0.002503,0.074667,0.002503,0.007492,0.000000,0.005000,0.002509,0.147259,...,0.017590,0.155974,0.002503,0.002505,0.030469,0.248015,0.189073,0.053053,0.004999,0.000000
1,1,0.002503,0.002503,0.169705,0.002503,0.009972,0.000000,0.005000,0.002509,0.210523,...,0.066999,0.192057,0.001669,0.002514,0.052053,0.253306,0.210771,0.079594,0.004997,0.000000
2,2,0.002503,0.002503,0.256846,0.002503,0.005981,0.000000,0.005000,0.005017,0.246846,...,0.086718,0.213288,0.001669,0.002017,0.186911,0.252129,0.258593,0.140990,0.004996,0.000000
3,3,0.005006,0.005003,0.339077,0.002503,0.005965,0.000000,0.005000,0.007525,0.291005,...,0.115564,0.224310,0.001669,0.003039,0.198936,0.246592,0.296846,0.152659,0.004995,0.000000
4,4,0.005005,0.005003,0.395804,0.002503,0.009946,0.000000,0.005000,0.008781,0.306826,...,0.146164,0.223421,0.001669,0.003047,0.210577,0.238285,0.311036,0.168877,0.006657,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,93,,0.066380,,0.023403,0.102016,0.781100,0.023594,0.086751,,...,,,0.093237,0.525661,,,,0.010123,0.114648,0.000149
94,94,,0.096813,,0.023404,0.102016,0.781102,,0.000315,,...,,,,0.610539,,,,0.010123,0.114859,0.000142
95,95,,0.083458,,0.021839,0.102016,0.554650,,0.000289,,...,,,,0.500720,,,,,0.133068,0.000142
96,96,,0.111148,,0.017222,0.102016,0.554652,,0.000270,,...,,,,1.454048,,,,,0.198507,


In [3]:
def fun(col_name='me_f100_maf0.2_cens0.1'):
    temp_df = df[[col_name, 'times']]
    temp_df = temp_df.dropna()
    try:
        val = np.trapz(temp_df[col_name], temp_df['times']) / (list(temp_df['times'])[-1] - list(temp_df['times'])[0])
    except Exception as e:
#         print(col_name, e)
        val = np.nan
    return val

In [4]:
ibs_df = pd.DataFrame(columns=['Dataset', 'IBS'])
for col in df.columns:
    if 'ci' not in col:
        val = fun(col)
        ibs_df = pd.concat([pd.DataFrame([[col,val]], columns=ibs_df.columns), ibs_df], ignore_index=True)
ibs_df = ibs_df.drop(ibs_df.tail(1).index)
ibs_df = ibs_df.reindex(index=ibs_df.index[::-1]).reset_index(drop=True)
ibs_df


Unnamed: 0,Dataset,IBS
0,add_f10000_maf0.2_cens0.1,0.064559
1,add_f10000_maf0.2_cens0.4,0.167249
2,add_f10000_maf0.2_cens0.8,0.369870
3,add_f10000_maf0.4_cens0.1,0.092319
4,add_f10000_maf0.4_cens0.4,0.211989
...,...,...
67,me_f100_maf0.2_cens0.4,0.160364
68,me_f100_maf0.2_cens0.8,0.342781
69,me_f100_maf0.4_cens0.1,0.137622
70,me_f100_maf0.4_cens0.4,0.166774


In [5]:
models = ['add', 'het', 'epi', 'me']
nfeats = ['f100', 'f1000', 'f10000']
mafs = ['maf0.2', 'maf0.4']
censs = ['cens0.1', 'cens0.4', 'cens0.8']

In [6]:
ibs_df[['model', 'nfeat', 'maf', 'cens']] = pd.DataFrame(list(ibs_df['Dataset'].str.split('_')))

In [7]:
# ibs_df = ibs_df.iloc[:-1]

In [8]:
ibs_df[ibs_df['model']=='me']

Unnamed: 0,Dataset,IBS,model,nfeat,maf,cens
54,me_f10000_maf0.2_cens0.1,0.109922,me,f10000,maf0.2,cens0.1
55,me_f10000_maf0.2_cens0.4,0.186272,me,f10000,maf0.2,cens0.4
56,me_f10000_maf0.2_cens0.8,0.567774,me,f10000,maf0.2,cens0.8
57,me_f10000_maf0.4_cens0.1,0.108604,me,f10000,maf0.4,cens0.1
58,me_f10000_maf0.4_cens0.4,0.1266,me,f10000,maf0.4,cens0.4
59,me_f10000_maf0.4_cens0.8,0.69366,me,f10000,maf0.4,cens0.8
60,me_f1000_maf0.2_cens0.1,0.077318,me,f1000,maf0.2,cens0.1
61,me_f1000_maf0.2_cens0.4,0.279521,me,f1000,maf0.2,cens0.4
62,me_f1000_maf0.2_cens0.8,0.345707,me,f1000,maf0.2,cens0.8
63,me_f1000_maf0.4_cens0.1,0.123749,me,f1000,maf0.4,cens0.1


In [9]:
models_wx = pd.DataFrame(columns=['model 1', 'model 2',  'Stat', 'p-val'])
for i in models:
    for j in models:
        if i != j:
            x = ibs_df[ibs_df['model']==i]['IBS']
            y = ibs_df[ibs_df['model']==j]['IBS']
            print(len(x), len(y), i, j)
            res = wilcoxon(x, y)
            stat, pval = res.statistic, res.pvalue
        else:
            stat, pval = np.nan, np.nan
        models_wx = pd.concat([pd.DataFrame([[i, j, stat, pval]], columns=models_wx.columns), models_wx], ignore_index=True)
models_wx_ct = pd.crosstab(models_wx['model 1'], models_wx['model 2'], models_wx['p-val'], aggfunc='mean' )
models_wx_ct

18 18 add het
18 18 add epi
18 18 add me
18 18 het add
18 18 het epi
18 18 het me
18 18 epi add
18 18 epi het
18 18 epi me
18 18 me add
18 18 me het
18 18 me epi


model 2,add,epi,het,me
model 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
add,,0.966118,0.966118,0.030365
epi,0.966118,,0.32473,0.141518
het,0.966118,0.32473,,0.392738
me,0.030365,0.141518,0.392738,


In [10]:
nfeats_wx = pd.DataFrame(columns=['nfeat 1', 'nfeat 2',  'Stat', 'p-val'])
for i in nfeats:
    for j in nfeats:
        if i != j:
            x = ibs_df[ibs_df['nfeat']==i]['IBS']
            y = ibs_df[ibs_df['nfeat']==j]['IBS']
            res = wilcoxon(x, y)
            stat, pval = res.statistic, res.pvalue
        else:
            stat, pval = np.nan, np.nan
        nfeats_wx = pd.concat([pd.DataFrame([[i, j, stat, pval]], columns=nfeats_wx.columns), nfeats_wx], ignore_index=True)
nfeats_wx_ct = pd.crosstab(nfeats_wx['nfeat 1'], nfeats_wx['nfeat 2'], nfeats_wx['p-val'], aggfunc='mean' )
nfeats_wx_ct

nfeat 2,f100,f1000,f10000
nfeat 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
f100,,0.921809,0.207617
f1000,0.921809,,0.128027
f10000,0.207617,0.128027,


In [11]:
mafs_wx = pd.DataFrame(columns=['maf 1', 'maf 2',  'Stat', 'p-val'])
for i in mafs:
    for j in mafs:
        if i != j:
            x = ibs_df[ibs_df['maf']==i]['IBS']
            y = ibs_df[ibs_df['maf']==j]['IBS']
            res = wilcoxon(x, y)
            stat, pval = res.statistic, res.pvalue
        else:
            stat, pval = np.nan, np.nan
        mafs_wx = pd.concat([pd.DataFrame([[i, j, stat, pval]], columns=mafs_wx.columns), mafs_wx], ignore_index=True)
mafs_wx_ct = pd.crosstab(mafs_wx['maf 1'], mafs_wx['maf 2'], mafs_wx['p-val'], aggfunc='mean' )
mafs_wx_ct

maf 2,maf0.2,maf0.4
maf 1,Unnamed: 1_level_1,Unnamed: 2_level_1
maf0.2,,0.143293
maf0.4,0.143293,


In [12]:
censs

['cens0.1', 'cens0.4', 'cens0.8']

In [13]:
censs_wx = pd.DataFrame(columns=['cens 1', 'cens 2',  'Stat', 'p-val'])
for i in censs:
    for j in censs:
        if i != j:
            x = ibs_df[ibs_df['cens']==i]['IBS']
            y = ibs_df[ibs_df['cens']==j]['IBS']
            res = wilcoxon(x, y)
            stat, pval = res.statistic, res.pvalue
        else:
            stat, pval = np.nan, np.nan
        censs_wx = pd.concat([pd.DataFrame([[i, j, stat, pval]], columns=censs_wx.columns), censs_wx], ignore_index=True)
censs_wx_ct = pd.crosstab(censs_wx['cens 1'], censs_wx['cens 2'], censs_wx['p-val'], aggfunc='mean' )
censs_wx_ct

cens 2,cens0.1,cens0.4,cens0.8
cens 1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
cens0.1,,0.013779,1e-05
cens0.4,0.013779,,3e-06
cens0.8,1e-05,3e-06,
