In [193]:
import pandas as pd
import os
if "R_HOME" not in os.environ:
    os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Resources/'
import numpy as np
import rpy2.robjects.numpy2ri
import rpy2.robjects as R
from rpy2.robjects.packages import importr
rpy2.robjects.numpy2ri.activate()
import statsmodels.stats.multitest as sm

R.r('set.seed')(1)

R_STATS = importr('stats')

In [194]:
def fisher_exact_2x2(matrix, alt='greater'):
    return R_STATS.fisher_test(matrix, alternative=alt)

In [195]:
labels_train = pd.read_csv('../../evaluation_validation_set/confidence_adjusted_tables/NN_reducedV3.4_removeN5_nfeatures21_pMax0.93856484.tsv',
                    sep='\t', index_col=0)
labels_test = pd.read_csv('../../evaluation_test_set/NN_reducedV3.4_removeN5_nfeatures21_testsetEval.tsv',
                         sep='\t', index_col=0)

labels_train.columns = ['C1', 'C2', 'C3', 'C4', 'C5']
labels_train['cluster'] = labels_train.idxmax(axis=1)
labels_test['cluster'] = 'C' + labels_test['PredictedCluster'].astype(int).astype(str)

In [196]:
labels = pd.DataFrame(pd.concat([labels_train['cluster'], labels_test['cluster']]), columns = ['cluster'])
labels

Unnamed: 0,cluster
DLBCL11534,C3
DLBCL10874,C2
DLBCL11431,C5
DLBCL_C_D_PAIR13,C5
DLBCL10915,C5
...,...
DLBCL_LS2258,C2
DLBCL11579,C2
DLBCL10507,C2
DLBCL11438,C2


In [197]:
table_s1['IPI'].value_counts()

1.0    155
2.0    136
3.0    116
0.0     73
4.0     41
5.0      7
Name: IPI, dtype: int64

In [198]:
table_s1 = pd.read_csv('../../data_tables/tableS1_classifier_merged.tsv', sep='\t', index_col=0)

samples_with_os = table_s1.loc[(~table_s1['OS.status (1=dead)'].isna()) & (table_s1['R-CHOp-like\nChemo'] == 'yes')].index
samples_with_pfs = table_s1.loc[(table_s1['PFS_STAT'] == 1.0)  & (table_s1['R-CHOp-like\nChemo'] == 'yes')].index
samples_with_pfs_ipi = table_s1.loc[(table_s1['PFS_STAT'] == 1.0) & 
                                    (~table_s1['IPI'].isna()) &
                                    (table_s1['R-CHOp-like\nChemo'] == 'yes')].index


In [199]:
c1_samples = labels.loc[labels['cluster'] == 'C1'].index
c2_samples = labels.loc[labels['cluster'] == 'C2'].index
c3_samples = labels.loc[labels['cluster'] == 'C3'].index
c4_samples = labels.loc[labels['cluster'] == 'C4'].index
c5_samples = labels.loc[labels['cluster'] == 'C5'].index
c3_samples

Index(['DLBCL11534', 'DLBCL_LS1304', 'DLBCL_LS2325', 'DLBCL_MC_F606_DMJ',
       'DLBC_FF_8046_TP_NB', 'DLBCL_RICOVER_1046', 'DLBCL10536',
       'DLBCL_MC_F815_BAT', 'DLBCL_LS1395', 'DLBCL10879',
       ...
       'DLBCL11532', 'DLBCL11575', 'DLBCL_RICOVER_1013', 'DLBCL_MC_F344_CJS',
       'DLBCL11585', 'DLBCL_LS4618', 'DLBCL_LS1620', 'DLBCL11572',
       'DLBCL_MC_F358_WDS', 'DLBCL_C_D_1127_NULLPAIR'],
      dtype='object', length=113)

In [200]:
samples = pd.read_csv('../../data_tables/sample_sets/ShippStaudtSets.purity0.2.txt', sep='\t', index_col=0)

In [201]:
######
# C1 #
######
shipp_samples_c1 = samples.loc[samples['cohort'] == 'Shipp']
shipp_samples_c1 = shipp_samples_c1.loc[shipp_samples_c1.index.isin(c1_samples)].index
shipp_samples_c1_os = shipp_samples_c1[shipp_samples_c1.isin(samples_with_os)]
shipp_samples_c1_pfs = shipp_samples_c1[shipp_samples_c1.isin(samples_with_pfs)]
shipp_samples_c1_pfs_ipi = shipp_samples_c1[shipp_samples_c1.isin(samples_with_pfs_ipi)]
staudt_samples_c1 = samples.loc[samples['cohort'] != 'Shipp']
staudt_samples_c1 = staudt_samples_c1.loc[staudt_samples_c1.index.isin(c1_samples)].index
staudt_samples_c1_os = staudt_samples_c1[staudt_samples_c1.isin(samples_with_os)]
staudt_samples_c1_pfs = staudt_samples_c1[staudt_samples_c1.isin(samples_with_pfs)]
staudt_samples_c1_pfs_ipi = staudt_samples_c1[staudt_samples_c1.isin(samples_with_pfs_ipi)]

######
# C2 #
######
shipp_samples_c2 = samples.loc[samples['cohort'] == 'Shipp']
shipp_samples_c2 = shipp_samples_c2.loc[shipp_samples_c2.index.isin(c2_samples)].index
shipp_samples_c2_os = shipp_samples_c2[shipp_samples_c2.isin(samples_with_os)]
shipp_samples_c2_pfs = shipp_samples_c2[shipp_samples_c2.isin(samples_with_pfs)]
shipp_samples_c2_pfs_ipi = shipp_samples_c2[shipp_samples_c2.isin(samples_with_pfs_ipi)]
staudt_samples_c2 = samples.loc[samples['cohort'] != 'Shipp']
staudt_samples_c2 = staudt_samples_c2.loc[staudt_samples_c2.index.isin(c2_samples)].index
staudt_samples_c2_os = staudt_samples_c2[staudt_samples_c2.isin(samples_with_os)]
staudt_samples_c2_pfs = staudt_samples_c2[staudt_samples_c2.isin(samples_with_pfs)]
staudt_samples_c2_pfs_ipi = staudt_samples_c2[staudt_samples_c2.isin(samples_with_pfs_ipi)]

######
# C3 #
######
shipp_samples_c3 = samples.loc[samples['cohort'] == 'Shipp']
shipp_samples_c3 = shipp_samples_c3.loc[shipp_samples_c3.index.isin(c3_samples)].index
shipp_samples_c3_os = shipp_samples_c3[shipp_samples_c3.isin(samples_with_os)]
shipp_samples_c3_pfs = shipp_samples_c3[shipp_samples_c3.isin(samples_with_pfs)]
shipp_samples_c3_pfs_ipi = shipp_samples_c3[shipp_samples_c3.isin(samples_with_pfs_ipi)]
staudt_samples_c3 = samples.loc[samples['cohort'] != 'Shipp']
staudt_samples_c3 = staudt_samples_c3.loc[staudt_samples_c3.index.isin(c3_samples)].index
staudt_samples_c3_os = staudt_samples_c3[staudt_samples_c3.isin(samples_with_os)]
staudt_samples_c3_pfs = staudt_samples_c3[staudt_samples_c3.isin(samples_with_pfs)]
staudt_samples_c3_pfs_ipi = staudt_samples_c3[staudt_samples_c3.isin(samples_with_pfs_ipi)]

######
# C4 #
######
shipp_samples_c4 = samples.loc[samples['cohort'] == 'Shipp']
shipp_samples_c4 = shipp_samples_c4.loc[shipp_samples_c4.index.isin(c4_samples)].index
shipp_samples_c4_os = shipp_samples_c4[shipp_samples_c4.isin(samples_with_os)]
shipp_samples_c4_pfs = shipp_samples_c4[shipp_samples_c4.isin(samples_with_pfs)]
shipp_samples_c4_pfs_ipi = shipp_samples_c4[shipp_samples_c4.isin(samples_with_pfs_ipi)]
staudt_samples_c4 = samples.loc[samples['cohort'] != 'Shipp']
staudt_samples_c4 = staudt_samples_c4.loc[staudt_samples_c4.index.isin(c4_samples)].index
staudt_samples_c4_os = staudt_samples_c4[staudt_samples_c4.isin(samples_with_os)]
staudt_samples_c4_pfs = staudt_samples_c4[staudt_samples_c4.isin(samples_with_pfs)]
staudt_samples_c4_pfs_ipi = staudt_samples_c4[staudt_samples_c4.isin(samples_with_pfs_ipi)]

######
# C5 #
######
shipp_samples_c5 = samples.loc[samples['cohort'] == 'Shipp']
shipp_samples_c5 = shipp_samples_c5.loc[shipp_samples_c5.index.isin(c5_samples)].index
shipp_samples_c5_os = shipp_samples_c5[shipp_samples_c5.isin(samples_with_os)]
shipp_samples_c5_pfs = shipp_samples_c5[shipp_samples_c5.isin(samples_with_pfs)]
shipp_samples_c5_pfs_ipi = shipp_samples_c5[shipp_samples_c5.isin(samples_with_pfs_ipi)]
staudt_samples_c5 = samples.loc[samples['cohort'] != 'Shipp']
staudt_samples_c5 = staudt_samples_c5.loc[staudt_samples_c5.index.isin(c5_samples)].index
staudt_samples_c5_os = staudt_samples_c5[staudt_samples_c5.isin(samples_with_os)]
staudt_samples_c5_pfs = staudt_samples_c5[staudt_samples_c5.isin(samples_with_pfs)]
staudt_samples_c5_pfs_ipi = staudt_samples_c5[staudt_samples_c5.isin(samples_with_pfs_ipi)]

In [202]:
train_samples = pd.read_csv('../../data_tables/train_test_sets/TrainingSet_550Subset_May2021.txt', 
                        sep='\t', index_col=0, header=None).index
test_samples = pd.read_csv('../../data_tables/train_test_sets/TestingSet_149Subset_May2021.txt',
                          sep='\t', index_col=0, header=None).index

train_samples_c1 = train_samples[train_samples.isin(c1_samples)]
train_samples_c2 = train_samples[train_samples.isin(c2_samples)]
train_samples_c3 = train_samples[train_samples.isin(c3_samples)]
train_samples_c4 = train_samples[train_samples.isin(c4_samples)]
train_samples_c5 = train_samples[train_samples.isin(c5_samples)]

test_samples_c1 = test_samples[test_samples.isin(c1_samples)]
test_samples_c2 = test_samples[test_samples.isin(c2_samples)]
test_samples_c3 = test_samples[test_samples.isin(c3_samples)]
test_samples_c4 = test_samples[test_samples.isin(c4_samples)]
test_samples_c5 = test_samples[test_samples.isin(c5_samples)]

In [203]:
qval_df = pd.read_csv('../../data_tables/qval_dfs/fisher_exact_5x2.Sep_23_2022.combined.tsv', 
                      sep='\t', index_col=0)
drivers = qval_df.loc[qval_df['q'] <= 0.1].index
drivers

Index(['SV.BCL6', 'DTX1', 'CD70', 'TNFAIP3', 'NOTCH2', 'BCL10', 'B2M', 'CD58',
       'X5Q.AMP', 'FAS',
       ...
       'X6Q14.1.DEL', 'SF3B1', 'ZC3H12A', 'ATP2A2', 'X6P21.33.DEL',
       'X1Q32.1.AMP', 'BCL11A', 'IRF4', 'LYN', 'ETS1'],
      dtype='object', length=163)

In [204]:
gsm = pd.read_csv('../../data_tables/gsm/DLBCL.699.fullGSM.Sep_23_2022.tsv', sep='\t', index_col=0)
gsm.loc['PLOIDY'] = (gsm.loc['PLOIDY'].astype(float) > 2.5).astype(int)
gsm.loc['COO_ABC'] = gsm.loc['COO'].map({'ABC': 1, 'GCB': 0, 'UNC': 0, 'na': 'na'})
gsm.loc['COO_GCB'] = gsm.loc['COO'].map({'ABC': 0, 'GCB': 1, 'UNC': 0, 'na': 'na'})
gsm.loc['COO_UNC'] = gsm.loc['COO'].map({'ABC': 0, 'GCB': 0, 'UNC': 1, 'na': 'na'})
gsm = gsm.drop('COO')
gsm = gsm.drop('PURITY')
gsm = gsm.loc[~gsm.index.str.contains('CCF')]
rows = [i for i in gsm.index if i not in ['COO_ABC', 'COO_GCB', 'COO_UNC']]
for idx in rows:
    gsm.loc[idx] = gsm.loc[idx].astype(float).astype(int)
gsm.head()

Unnamed: 0_level_0,DLBCL11470,DLBCL10900,DLBC_FF_A7CQ_TP_NB,DLBCL10462,DLBCL_RICOVER_1081,DLBCL_LS1098,DLBCL_RICOVER_299,DLBCL11558,DLBCL_C_D_1105_NULLPAIR,DLBCL11447,...,DLBCL11455,DLBCL_RICOVER_685,DLBCL_LS146,DLBCL_RICOVER_111,DLBCL_RICOVER_173,DLBCL11515,DLBCL10491,DLBCL_RICOVER_1046,DLBCL10547,DLBCL10998
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
STAT3,0,0,0,0,2,0,0,0,0,0,...,2,0,0,0,0,0,0,0,0,0
STK33,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
OSBPL10,0,2,1,0,0,0,0,0,0,1,...,2,0,0,0,0,2,0,0,0,0
BCL11A,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
PIM1,2,2,2,2,0,0,2,0,0,2,...,0,2,0,0,0,0,0,0,2,0


In [205]:
gsm.loc['IPI_Low_01'] = ((table_s1['IPI'] == 0.0) | (table_s1['IPI'] == 1.0)).astype(int)
gsm.loc['IPI_IntermediateLow_2'] = ((table_s1['IPI'] == 2.0)).astype(int)
gsm.loc['IPI_IntermediateHigh_3'] = ((table_s1['IPI'] == 3.0)).astype(int)
gsm.loc['IPI_High_45'] = ((table_s1['IPI'] == 4.0) | (table_s1['IPI'] == 5.0)).astype(int)
gsm.loc['SV.MYC_SV.BCL2'] = ((gsm.loc['SV.MYC'] != 0) & (gsm.loc['SV.BCL2'] != 0)).astype(int)

In [206]:
c1_drivers = qval_df.loc[(qval_df['cluster'] == 'C1') & (qval_df['q'] <= 0.10)].index
c2_drivers = qval_df.loc[(qval_df['cluster'] == 'C2') & (qval_df['q'] <= 0.10)].index
c3_drivers = qval_df.loc[(qval_df['cluster'] == 'C3') & (qval_df['q'] <= 0.10)].index
c4_drivers = qval_df.loc[(qval_df['cluster'] == 'C4') & (qval_df['q'] <= 0.10)].index
c5_drivers = qval_df.loc[(qval_df['cluster'] == 'C5') & (qval_df['q'] <= 0.10)].index

In [207]:
coo_ploidy = ['COO_ABC', 'COO_GCB', 'COO_UNC', 'PLOIDY']
groups = ['Staudt', 'Shipp']
sets = [staudt_samples_c3, shipp_samples_c3]

cols = ['p', 'Shipp_wt', 'Shipp_mt', 'Staudt_wt', 'Staudt_mt',
        'p_os', 'Shipp_os_wt', 'Shipp_os_mt', 'Staudt_os_wt', 'Staudt_os_mt',
        'p_pfs', 'Shipp_pfs_wt', 'Shipp_pfs_mt', 'Staudt_pfs_wt', 'Staudt_pfs_mt',
        'p_pfs_ipi', 'Shipp_pfs_ipi_wt', 'Shipp_pfs_ipi_mt', 'Staudt_pfs_ipi_wt', 'Staudt_pfs_ipi_mt',
        'p_test_train', 'Train_wt', 'Train_mt', 'Test_wt', 'Test_mt']

stats_table_coo = np.array([[0] * len(cols)] * 4)
stats_table_coo = pd.DataFrame(stats_table_coo)
stats_table_coo.index = coo_ploidy

stats_table_coo.columns = cols

stats_table_coo_c1 = stats_table_coo.copy(deep=True)
stats_table_coo_c2 = stats_table_coo.copy(deep=True)
stats_table_coo_c3 = stats_table_coo.copy(deep=True)
stats_table_coo_c4 = stats_table_coo.copy(deep=True)
stats_table_coo_c5 = stats_table_coo.copy(deep=True)

In [208]:
coo_sets = [(stats_table_coo_c1, 
             shipp_samples_c1, staudt_samples_c1, 
             shipp_samples_c1_os, staudt_samples_c1_os,
             shipp_samples_c1_pfs, staudt_samples_c1_pfs,
             shipp_samples_c1_pfs_ipi, staudt_samples_c1_pfs_ipi,
             train_samples_c1, test_samples_c1),
            (stats_table_coo_c2, 
             shipp_samples_c2, staudt_samples_c2, 
             shipp_samples_c2_os, staudt_samples_c2_os,
             shipp_samples_c2_pfs, staudt_samples_c2_pfs,
             shipp_samples_c2_pfs_ipi, staudt_samples_c2_pfs_ipi,
             train_samples_c2, test_samples_c2),
            (stats_table_coo_c3, 
             shipp_samples_c3, staudt_samples_c3, 
             shipp_samples_c3_os, staudt_samples_c3_os,
             shipp_samples_c3_pfs, staudt_samples_c3_pfs,
             shipp_samples_c3_pfs_ipi, staudt_samples_c3_pfs_ipi,
             train_samples_c3, test_samples_c3),
            (stats_table_coo_c4, 
             shipp_samples_c4, staudt_samples_c4, 
             shipp_samples_c4_os, staudt_samples_c4_os,
             shipp_samples_c4_pfs, staudt_samples_c4_pfs,
             shipp_samples_c4_pfs_ipi, staudt_samples_c4_pfs_ipi,
             train_samples_c4, test_samples_c4),
            (stats_table_coo_c5, 
             shipp_samples_c5, staudt_samples_c5, 
             shipp_samples_c5_os, staudt_samples_c5_os,
             shipp_samples_c5_pfs, staudt_samples_c5_pfs,
             shipp_samples_c5_pfs_ipi, staudt_samples_c5_pfs_ipi,
             train_samples_c5, test_samples_c5)]

for s in coo_sets:
    curr_stats_table = s[0]
    shipp_samples_curr = s[1]
    staudt_samples_curr = s[2]
    shipp_samples_curr_os = s[3]
    staudt_samples_curr_os = s[4]
    shipp_samples_curr_pfs = s[5]
    staudt_samples_curr_pfs = s[6]
    shipp_samples_curr_pfs_ipi = s[7]
    staudt_samples_curr_pfs_ipi = s[8]
    train_samples_curr = s[9]
    test_samples_curr = s[10]
    for c in curr_stats_table.index:
        sh_g = gsm.loc[c, shipp_samples_curr]
        st_g = gsm.loc[c, staudt_samples_curr]

        sh_mt = (sh_g == 1).sum()
        sh_wt = (sh_g == 0).sum()
        st_mt = (st_g == 1).sum()
        st_wt = (st_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = sh_mt
        m[0][1] = sh_wt
        m[1][0] = st_mt
        m[1][1] = st_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p'] = p
        curr_stats_table.loc[c, 'Shipp_wt'] = sh_wt
        curr_stats_table.loc[c, 'Shipp_mt'] = sh_mt
        curr_stats_table.loc[c, 'Staudt_wt'] = st_wt
        curr_stats_table.loc[c, 'Staudt_mt'] = st_mt

    for c in curr_stats_table.index:
        sh_g = gsm.loc[c, shipp_samples_curr_os]
        st_g = gsm.loc[c, staudt_samples_curr_os]

        sh_mt = (sh_g == 1).sum()
        sh_wt = (sh_g == 0).sum()
        st_mt = (st_g == 1).sum()
        st_wt = (st_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = sh_mt
        m[0][1] = sh_wt
        m[1][0] = st_mt
        m[1][1] = st_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p_os'] = p
        curr_stats_table.loc[c, 'Shipp_os_wt'] = sh_wt
        curr_stats_table.loc[c, 'Shipp_os_mt'] = sh_mt
        curr_stats_table.loc[c, 'Staudt_os_wt'] = st_wt
        curr_stats_table.loc[c, 'Staudt_os_mt'] = st_mt
        
    for c in curr_stats_table.index:
        sh_g = gsm.loc[c, shipp_samples_curr_pfs]
        st_g = gsm.loc[c, staudt_samples_curr_pfs]

        sh_mt = (sh_g == 1).sum()
        sh_wt = (sh_g == 0).sum()
        st_mt = (st_g == 1).sum()
        st_wt = (st_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = sh_mt
        m[0][1] = sh_wt
        m[1][0] = st_mt
        m[1][1] = st_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p_pfs'] = p
        curr_stats_table.loc[c, 'Shipp_pfs_wt'] = sh_wt
        curr_stats_table.loc[c, 'Shipp_pfs_mt'] = sh_mt
        curr_stats_table.loc[c, 'Staudt_pfs_wt'] = st_wt
        curr_stats_table.loc[c, 'Staudt_pfs_mt'] = st_mt
        
    for c in curr_stats_table.index:
        sh_g = gsm.loc[c, shipp_samples_curr_pfs_ipi]
        st_g = gsm.loc[c, staudt_samples_curr_pfs_ipi]

        sh_mt = (sh_g == 1).sum()
        sh_wt = (sh_g == 0).sum()
        st_mt = (st_g == 1).sum()
        st_wt = (st_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = sh_mt
        m[0][1] = sh_wt
        m[1][0] = st_mt
        m[1][1] = st_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p_pfs_ipi'] = p
        curr_stats_table.loc[c, 'Shipp_pfs_ipi_wt'] = sh_wt
        curr_stats_table.loc[c, 'Shipp_pfs_ipi_mt'] = sh_mt
        curr_stats_table.loc[c, 'Staudt_pfs_ipi_wt'] = st_wt
        curr_stats_table.loc[c, 'Staudt_pfs_ipi_mt'] = st_mt
        
    for c in curr_stats_table.index:
        tr_g = gsm.loc[c, train_samples_curr]
        te_g = gsm.loc[c, test_samples_curr]

        te_mt = (te_g == 1).sum()
        te_wt = (te_g == 0).sum()
        tr_mt = (tr_g == 1).sum()
        tr_wt = (tr_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = te_mt
        m[0][1] = te_wt
        m[1][0] = tr_mt
        m[1][1] = tr_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p_test_train'] = p
        curr_stats_table.loc[c, 'Train_wt'] = tr_wt
        curr_stats_table.loc[c, 'Train_mt'] = tr_mt
        curr_stats_table.loc[c, 'Test_wt'] = te_wt
        curr_stats_table.loc[c, 'Test_mt'] = te_mt

In [209]:
stats_table_coo_c3[['p', 'p_os', 'p_pfs', 'p_pfs_ipi', 'p_test_train']]

Unnamed: 0,p,p_os,p_pfs,p_pfs_ipi,p_test_train
COO_ABC,0.396798,1.0,1.0,1.0,0.33699
COO_GCB,0.14815,1.0,1.0,1.0,0.291557
COO_UNC,0.467083,1.0,1.0,1.0,1.0
PLOIDY,1.0,1.0,1.0,1.0,0.001551


In [210]:
stats_table_coo_c2

Unnamed: 0,p,Shipp_wt,Shipp_mt,Staudt_wt,Staudt_mt,p_os,Shipp_os_wt,Shipp_os_mt,Staudt_os_wt,Staudt_os_mt,...,p_pfs_ipi,Shipp_pfs_ipi_wt,Shipp_pfs_ipi_mt,Staudt_pfs_ipi_wt,Staudt_pfs_ipi_mt,p_test_train,Train_wt,Train_mt,Test_wt,Test_mt
COO_ABC,0.871265,34,30,46,43,0.169826,26,27,34,19,...,0.523107,12,10,7,10,1.0,62,56,18,17
COO_GCB,0.736347,40,24,58,31,0.117369,34,19,25,28,...,0.317881,13,9,13,4,0.422599,78,40,20,15
COO_UNC,1.0,54,10,74,15,1.0,46,7,47,6,...,1.0,19,3,14,3,0.198636,96,22,32,3
PLOIDY,0.156929,53,26,50,39,0.024575,46,21,25,28,...,0.201368,23,8,9,8,1.0,78,49,25,16


In [211]:
stats_table_coo_c3

Unnamed: 0,p,Shipp_wt,Shipp_mt,Staudt_wt,Staudt_mt,p_os,Shipp_os_wt,Shipp_os_mt,Staudt_os_wt,Staudt_os_mt,...,p_pfs_ipi,Shipp_pfs_ipi_wt,Shipp_pfs_ipi_mt,Staudt_pfs_ipi_wt,Staudt_pfs_ipi_mt,p_test_train,Train_wt,Train_mt,Test_wt,Test_mt
COO_ABC,0.396798,41,1,56,5,1.0,36,1,41,2,...,1.0,17,1,11,0,0.33699,75,6,22,0
COO_GCB,0.14815,3,39,11,50,1.0,3,34,4,39,...,1.0,2,16,1,10,0.291557,13,68,1,21
COO_UNC,0.467083,40,2,55,6,1.0,35,2,41,2,...,1.0,17,1,10,1,1.0,74,7,21,1
PLOIDY,1.0,49,3,58,3,1.0,43,3,41,2,...,1.0,19,1,11,0,0.001551,88,1,19,5


In [212]:
stats_table_coo_c4

Unnamed: 0,p,Shipp_wt,Shipp_mt,Staudt_wt,Staudt_mt,p_os,Shipp_os_wt,Shipp_os_mt,Staudt_os_wt,Staudt_os_mt,...,p_pfs_ipi,Shipp_pfs_ipi_wt,Shipp_pfs_ipi_mt,Staudt_pfs_ipi_wt,Staudt_pfs_ipi_mt,p_test_train,Train_wt,Train_mt,Test_wt,Test_mt
COO_ABC,0.321856,27,6,42,18,0.22448,22,5,32,2,...,0.270175,5,2,12,1,0.005536,46,23,23,1
COO_GCB,0.278279,13,20,32,28,0.249904,10,17,7,27,...,0.612616,3,4,3,10,0.002001,40,29,5,19
COO_UNC,1.0,26,7,46,14,0.738259,22,5,29,5,...,1.0,6,1,11,2,0.573716,52,17,20,4
PLOIDY,0.509133,34,5,55,5,0.476283,28,5,31,3,...,0.155844,7,2,13,0,0.719583,66,7,23,3


In [213]:
stats_table_coo_c5

Unnamed: 0,p,Shipp_wt,Shipp_mt,Staudt_wt,Staudt_mt,p_os,Shipp_os_wt,Shipp_os_mt,Staudt_os_wt,Staudt_os_mt,...,p_pfs_ipi,Shipp_pfs_ipi_wt,Shipp_pfs_ipi_mt,Staudt_pfs_ipi_wt,Staudt_pfs_ipi_mt,p_test_train,Train_wt,Train_mt,Test_wt,Test_mt
COO_ABC,0.80347,7,41,15,101,0.410004,6,39,10,36,...,0.41402,2,21,4,18,0.548779,17,118,5,24
COO_GCB,1.0,46,2,111,5,0.676844,43,2,42,4,...,0.488889,23,0,21,1,1.0,129,6,28,1
COO_UNC,0.768245,43,5,106,10,0.739418,41,4,40,6,...,0.665154,21,2,19,3,0.307275,124,11,25,4
PLOIDY,0.004603,64,0,103,13,0.00582,59,0,40,6,...,0.065739,31,0,19,3,0.719459,135,10,32,3


In [214]:
stats_table = np.array([[0] * len(cols)] * len(drivers))
stats_table = pd.DataFrame(stats_table)

stats_table.columns = cols
stats_table.index = drivers

stats_table.loc['SV.MYC_SV.BCL2'] = 0
stats_table.loc['IPI_Low_01'] = 0
stats_table.loc['IPI_IntermediateLow_2'] = 0
stats_table.loc['IPI_IntermediateHigh_3'] = 0
stats_table.loc['IPI_High_45'] = 0

stats_table_c1 = stats_table.copy(deep=True)
stats_table_c2 = stats_table.copy(deep=True)
stats_table_c3 = stats_table.copy(deep=True)
stats_table_c4 = stats_table.copy(deep=True)
stats_table_c5 = stats_table.copy(deep=True)

In [215]:
all_sets = [(stats_table_c1, 
             shipp_samples_c1, staudt_samples_c1, 
             shipp_samples_c1_os, staudt_samples_c1_os,
             shipp_samples_c1_pfs, staudt_samples_c1_pfs,
             shipp_samples_c1_pfs_ipi, staudt_samples_c1_pfs_ipi,
             train_samples_c1, test_samples_c1),
            (stats_table_c2, 
             shipp_samples_c2, staudt_samples_c2, 
             shipp_samples_c2_os, staudt_samples_c2_os,
             shipp_samples_c2_pfs, staudt_samples_c2_pfs,
             shipp_samples_c2_pfs_ipi, staudt_samples_c2_pfs_ipi,
             train_samples_c2, test_samples_c2),
            (stats_table_c3, 
             shipp_samples_c3, staudt_samples_c3, 
             shipp_samples_c3_os, staudt_samples_c3_os,
             shipp_samples_c3_pfs, staudt_samples_c3_pfs,
             shipp_samples_c3_pfs_ipi, staudt_samples_c3_pfs_ipi,
             train_samples_c3, test_samples_c3),
            (stats_table_c4, 
             shipp_samples_c4, staudt_samples_c4, 
             shipp_samples_c4_os, staudt_samples_c4_os,
             shipp_samples_c4_pfs, staudt_samples_c4_pfs,
             shipp_samples_c4_pfs_ipi, staudt_samples_c4_pfs_ipi,
             train_samples_c4, test_samples_c4),
            (stats_table_c5, 
             shipp_samples_c5, staudt_samples_c5, 
             shipp_samples_c5_os, staudt_samples_c5_os,
             shipp_samples_c5_pfs, staudt_samples_c5_pfs,
             shipp_samples_c5_pfs_ipi, staudt_samples_c5_pfs_ipi,
             train_samples_c5, test_samples_c5)]

for s in all_sets:
    curr_stats_table = s[0]
    curr_shipp_samples = s[1]
    curr_staudt_samples = s[2]
    curr_shipp_samples_os = s[3]
    curr_staudt_samples_os = s[4]
    curr_shipp_samples_pfs = s[5]
    curr_staudt_samples_pfs = s[6]
    curr_shipp_samples_pfs_ipi = s[7]
    curr_staudt_samples_pfs_ipi = s[8]
    train_samples_curr = s[9]
    test_samples_curr = s[10]
    
    for c in curr_stats_table.index:
        sh_g = gsm.loc[c, curr_shipp_samples]
        st_g = gsm.loc[c, curr_staudt_samples]

        sh_mt = (sh_g != 0).sum()
        sh_wt = (sh_g == 0).sum()
        st_mt = (st_g != 0).sum()
        st_wt = (st_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = sh_mt
        m[0][1] = sh_wt
        m[1][0] = st_mt
        m[1][1] = st_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p'] = p
        curr_stats_table.loc[c, 'Shipp_wt'] = sh_wt
        curr_stats_table.loc[c, 'Shipp_mt'] = sh_mt
        curr_stats_table.loc[c, 'Staudt_wt'] = st_wt
        curr_stats_table.loc[c, 'Staudt_mt'] = st_mt

    for c in curr_stats_table.index:
        sh_g = gsm.loc[c, curr_shipp_samples_os]
        st_g = gsm.loc[c, curr_staudt_samples_os]

        sh_mt = (sh_g != 0).sum()
        sh_wt = (sh_g == 0).sum()
        st_mt = (st_g != 0).sum()
        st_wt = (st_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = sh_mt
        m[0][1] = sh_wt
        m[1][0] = st_mt
        m[1][1] = st_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p_os'] = p
        curr_stats_table.loc[c, 'Shipp_os_wt'] = sh_wt
        curr_stats_table.loc[c, 'Shipp_os_mt'] = sh_mt
        curr_stats_table.loc[c, 'Staudt_os_wt'] = st_wt
        curr_stats_table.loc[c, 'Staudt_os_mt'] = st_mt
        
    for c in curr_stats_table.index:
        sh_g = gsm.loc[c, curr_shipp_samples_pfs]
        st_g = gsm.loc[c, curr_staudt_samples_pfs]

        sh_mt = (sh_g != 0).sum()
        sh_wt = (sh_g == 0).sum()
        st_mt = (st_g != 0).sum()
        st_wt = (st_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = sh_mt
        m[0][1] = sh_wt
        m[1][0] = st_mt
        m[1][1] = st_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p_pfs'] = p
        curr_stats_table.loc[c, 'Shipp_pfs_wt'] = sh_wt
        curr_stats_table.loc[c, 'Shipp_pfs_mt'] = sh_mt
        curr_stats_table.loc[c, 'Staudt_pfs_wt'] = st_wt
        curr_stats_table.loc[c, 'Staudt_pfs_mt'] = st_mt
        
    for c in curr_stats_table.index:
        sh_g = gsm.loc[c, curr_shipp_samples_pfs_ipi]
        st_g = gsm.loc[c, curr_staudt_samples_pfs_ipi]

        sh_mt = (sh_g != 0).sum()
        sh_wt = (sh_g == 0).sum()
        st_mt = (st_g != 0).sum()
        st_wt = (st_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = sh_mt
        m[0][1] = sh_wt
        m[1][0] = st_mt
        m[1][1] = st_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p_pfs_ipi'] = p
        curr_stats_table.loc[c, 'Shipp_pfs_ipi_wt'] = sh_wt
        curr_stats_table.loc[c, 'Shipp_pfs_ipi_mt'] = sh_mt
        curr_stats_table.loc[c, 'Staudt_pfs_ipi_wt'] = st_wt
        curr_stats_table.loc[c, 'Staudt_pfs_ipi_mt'] = st_mt
        
    for c in curr_stats_table.index:
        tr_g = gsm.loc[c, train_samples_curr]
        te_g = gsm.loc[c, test_samples_curr]

        te_mt = (te_g == 1).sum()
        te_wt = (te_g == 0).sum()
        tr_mt = (tr_g == 1).sum()
        tr_wt = (tr_g == 0).sum()

        m = np.array([[0, 0]] * 2)
        m[0][0] = te_mt
        m[0][1] = te_wt
        m[1][0] = tr_mt
        m[1][1] = tr_wt

        p = fisher_exact_2x2(m, 'two.sided')[0][0]

        curr_stats_table.loc[c, 'p_test_train'] = p
        curr_stats_table.loc[c, 'Train_wt'] = tr_wt
        curr_stats_table.loc[c, 'Train_mt'] = tr_mt
        curr_stats_table.loc[c, 'Test_wt'] = te_wt
        curr_stats_table.loc[c, 'Test_mt'] = te_mt

In [216]:
stats_table_c1

Unnamed: 0,p,Shipp_wt,Shipp_mt,Staudt_wt,Staudt_mt,p_os,Shipp_os_wt,Shipp_os_mt,Staudt_os_wt,Staudt_os_mt,...,p_pfs_ipi,Shipp_pfs_ipi_wt,Shipp_pfs_ipi_mt,Staudt_pfs_ipi_wt,Staudt_pfs_ipi_mt,p_test_train,Train_wt,Train_mt,Test_wt,Test_mt
SV.BCL6,0.082115,10,33,38,58,0.799624,9,24,13,28,...,0.691318,3,7,6,9,1.000000,43,0,5,0
DTX1,0.702614,29,14,60,36,0.343774,23,10,24,17,...,1.000000,6,4,8,7,1.000000,74,5,15,1
CD70,1.000000,29,14,63,33,0.341786,22,11,22,19,...,0.228963,8,2,8,7,1.000000,78,0,14,0
TNFAIP3,0.251257,31,12,58,38,1.000000,23,10,28,13,...,1.000000,8,2,11,4,1.000000,78,0,11,0
NOTCH2,0.838565,32,11,69,27,1.000000,23,10,29,12,...,0.401789,8,2,9,6,1.000000,82,0,19,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SV.MYC_SV.BCL2,1.000000,43,0,96,0,1.000000,33,0,41,0,...,1.000000,10,0,15,0,1.000000,116,0,23,0
IPI_Low_01,0.235339,33,10,63,33,0.472105,23,10,25,16,...,0.228963,8,2,8,7,1.000000,80,36,16,7
IPI_IntermediateLow_2,0.609568,38,5,80,16,0.125453,30,3,31,10,...,0.050791,10,0,9,6,1.000000,98,18,20,3
IPI_IntermediateHigh_3,0.065754,33,10,86,10,0.013748,23,10,38,3,...,0.075234,5,5,13,2,0.744671,100,16,19,4


In [217]:
c1_drivers

Index(['SV.BCL6', 'DTX1', 'CD70', 'TNFAIP3', 'NOTCH2', 'BCL10', 'B2M', 'CD58',
       'X5Q.AMP', 'FAS', 'UBE2A', 'ZEB2', 'BCL7A', 'TMEM30A', 'EBF1', 'GNAI2',
       'NFKBIE', 'BCL6', 'SPEN', 'CXCR4', 'IKZF3', 'MYD88.OTHER', 'POU2F2',
       'TUBGCP5', 'SMG7', 'CCDC27', 'FADD', 'RHOA', 'TNIP1'],
      dtype='object')

In [218]:
stats_table_c1 = stats_table_c1.loc[list(c1_drivers) + ['SV.MYC_SV.BCL2', 'IPI_Low_01', 
                                                        'IPI_IntermediateLow_2', 'IPI_IntermediateHigh_3',
                                                        'IPI_High_45']]
stats_table_c2 = stats_table_c2.loc[list(c2_drivers) + ['SV.MYC_SV.BCL2', 'IPI_Low_01', 
                                                        'IPI_IntermediateLow_2', 'IPI_IntermediateHigh_3',
                                                        'IPI_High_45']]
stats_table_c3 = stats_table_c3.loc[list(c3_drivers) + ['SV.MYC_SV.BCL2', 'IPI_Low_01', 
                                                        'IPI_IntermediateLow_2', 'IPI_IntermediateHigh_3',
                                                        'IPI_High_45']]
stats_table_c4 = stats_table_c4.loc[list(c4_drivers) + ['SV.MYC_SV.BCL2', 'IPI_Low_01', 
                                                        'IPI_IntermediateLow_2', 'IPI_IntermediateHigh_3',
                                                        'IPI_High_45']]
stats_table_c5 = stats_table_c5.loc[list(c5_drivers) + ['SV.MYC_SV.BCL2', 'IPI_Low_01', 
                                                        'IPI_IntermediateLow_2', 'IPI_IntermediateHigh_3',
                                                        'IPI_High_45']]

In [219]:
stats_table_all_c1 = pd.concat([stats_table_c1, stats_table_coo_c1])
stats_table_all_c1.insert(0, 'q', sm.multipletests(stats_table_all_c1['p'], method='fdr_bh')[1])
stats_table_all_c1.insert(6, 'q_os', sm.multipletests(stats_table_all_c1['p_os'], method='fdr_bh')[1])
stats_table_all_c1.insert(12, 'q_pfs', sm.multipletests(stats_table_all_c1['p_pfs'], method='fdr_bh')[1])
stats_table_all_c1.insert(18, 'q_pfs_ipi', sm.multipletests(stats_table_all_c1['p_pfs_ipi'], method='fdr_bh')[1])
stats_table_all_c1.insert(24, 'q_test_train', sm.multipletests(stats_table_all_c1['p_test_train'], method='fdr_bh')[1])
stats_table_all_c1 = stats_table_all_c1.sort_values(by='p', ascending=True)

stats_table_all_c2 = pd.concat([stats_table_c2, stats_table_coo_c2])
stats_table_all_c2.insert(0, 'q', sm.multipletests(stats_table_all_c2['p'], method='fdr_bh')[1])
stats_table_all_c2.insert(6, 'q_os', sm.multipletests(stats_table_all_c2['p_os'], method='fdr_bh')[1])
stats_table_all_c2.insert(12, 'q_pfs', sm.multipletests(stats_table_all_c2['p_pfs'], method='fdr_bh')[1])
stats_table_all_c2.insert(18, 'q_pfs_ipi', sm.multipletests(stats_table_all_c2['p_pfs_ipi'], method='fdr_bh')[1])
stats_table_all_c2.insert(24, 'q_test_train', sm.multipletests(stats_table_all_c2['p_test_train'], method='fdr_bh')[1])
stats_table_all_c2 = stats_table_all_c2.sort_values(by='p', ascending=True)

stats_table_all_c3 = pd.concat([stats_table_c3, stats_table_coo_c3])
stats_table_all_c3.insert(0, 'q', sm.multipletests(stats_table_all_c3['p'], method='fdr_bh')[1])
stats_table_all_c3.insert(6, 'q_os', sm.multipletests(stats_table_all_c3['p_os'], method='fdr_bh')[1])
stats_table_all_c3.insert(12, 'q_pfs', sm.multipletests(stats_table_all_c3['p_pfs'], method='fdr_bh')[1])
stats_table_all_c3.insert(18, 'q_pfs_ipi', sm.multipletests(stats_table_all_c3['p_pfs_ipi'], method='fdr_bh')[1])
stats_table_all_c3.insert(24, 'q_test_train', sm.multipletests(stats_table_all_c3['p_test_train'], method='fdr_bh')[1])
stats_table_all_c3 = stats_table_all_c3.sort_values(by='p', ascending=True)

stats_table_all_c4 = pd.concat([stats_table_c4, stats_table_coo_c4])
stats_table_all_c4.insert(0, 'q', sm.multipletests(stats_table_all_c4['p'], method='fdr_bh')[1])
stats_table_all_c4.insert(6, 'q_os', sm.multipletests(stats_table_all_c4['p_os'], method='fdr_bh')[1])
stats_table_all_c4.insert(12, 'q_pfs', sm.multipletests(stats_table_all_c4['p_pfs'], method='fdr_bh')[1])
stats_table_all_c4.insert(18, 'q_pfs_ipi', sm.multipletests(stats_table_all_c4['p_pfs_ipi'], method='fdr_bh')[1])
stats_table_all_c4.insert(24, 'q_test_train', sm.multipletests(stats_table_all_c4['p_test_train'], method='fdr_bh')[1])
stats_table_all_c4 = stats_table_all_c4.sort_values(by='p', ascending=True)

stats_table_all_c5 = pd.concat([stats_table_c5, stats_table_coo_c5])
stats_table_all_c5.insert(0, 'q', sm.multipletests(stats_table_all_c5['p'], method='fdr_bh')[1])
stats_table_all_c5.insert(6, 'q_os', sm.multipletests(stats_table_all_c5['p_os'], method='fdr_bh')[1])
stats_table_all_c5.insert(12, 'q_pfs', sm.multipletests(stats_table_all_c5['p_pfs'], method='fdr_bh')[1])
stats_table_all_c5.insert(18, 'q_pfs_ipi', sm.multipletests(stats_table_all_c5['p_pfs_ipi'], method='fdr_bh')[1])
stats_table_all_c5.insert(24, 'q_test_train', sm.multipletests(stats_table_all_c5['p_test_train'], method='fdr_bh')[1])
stats_table_all_c5 = stats_table_all_c5.sort_values(by='p', ascending=True)

stats_table_all_c1.columns

Index(['q', 'p', 'Shipp_wt', 'Shipp_mt', 'Staudt_wt', 'Staudt_mt', 'q_os',
       'p_os', 'Shipp_os_wt', 'Shipp_os_mt', 'Staudt_os_wt', 'Staudt_os_mt',
       'q_pfs', 'p_pfs', 'Shipp_pfs_wt', 'Shipp_pfs_mt', 'Staudt_pfs_wt',
       'Staudt_pfs_mt', 'q_pfs_ipi', 'p_pfs_ipi', 'Shipp_pfs_ipi_wt',
       'Shipp_pfs_ipi_mt', 'Staudt_pfs_ipi_wt', 'Staudt_pfs_ipi_mt',
       'q_test_train', 'p_test_train', 'Train_wt', 'Train_mt', 'Test_wt',
       'Test_mt'],
      dtype='object')

In [220]:
stats_table_all_c5.iloc[:, 20:27]

Unnamed: 0,Shipp_pfs_ipi_wt,Shipp_pfs_ipi_mt,Staudt_pfs_ipi_wt,Staudt_pfs_ipi_mt,q_test_train,p_test_train,Train_wt
OSBPL10,28,3,16,6,1.0,1.0,108
BTG2,29,2,15,7,1.0,1.0,107
IPI_IntermediateHigh_3,15,16,18,4,1.0,0.065214,118
IRF2BP2,30,1,16,6,1.0,0.578421,114
PLOIDY,31,0,19,3,1.0,0.719459,135
TOX,31,0,18,4,1.0,1.0,131
IRF4,29,2,16,6,1.0,1.0,120
X6Q14.1.DEL,28,3,20,2,1.0,1.0,132
IPI_High_45,26,5,20,2,1.0,0.449369,137
HLA.A,25,6,16,6,1.0,1.0,121


In [221]:
stats_table_all_c1.to_csv('../../data_tables/qval_dfs/predictedC1_staudt_vs_shipp.tsv', sep='\t')
stats_table_all_c2.to_csv('../../data_tables/qval_dfs/predictedC2_staudt_vs_shipp.tsv', sep='\t')
stats_table_all_c3.to_csv('../../data_tables/qval_dfs/predictedC3_staudt_vs_shipp.tsv', sep='\t')
stats_table_all_c4.to_csv('../../data_tables/qval_dfs/predictedC4_staudt_vs_shipp.tsv', sep='\t')
stats_table_all_c5.to_csv('../../data_tables/qval_dfs/predictedC5_staudt_vs_shipp.tsv', sep='\t')

In [55]:
stats_table_all.iloc[0:8,].index

Index(['X12P13.2.DEL', 'BCL2', 'SV.BCL2', 'X6P21.1.AMP', 'FAS', 'X9P21.3.DEL',
       'SOCS1', 'BTG1'],
      dtype='object')

In [56]:
qval_df.loc[stats_table_all.iloc[0:8,].index]

Unnamed: 0,cluster,q,p,overall_frequency,C1_nf,C2_nf,C3_nf,C4_nf,C5_nf,C1_f,...,C3_wt,C4_mut,C4_wt,C5_mut,C5_wt,C1_sum,C2_sum,C3_sum,C4_sum,C5_sum
X12P13.2.DEL,C2,3.6e-05,1e-05,0.078182,0.046512,0.581395,0.093023,0.046512,0.232558,0.003636,...,91,2,73,10,129,2,25,4,2,10
BCL2,C3,3.6e-05,1e-05,0.161818,0.033708,0.146067,0.58427,0.067416,0.168539,0.005455,...,43,6,69,15,124,5,26,94,12,24
SV.BCL2,C3,3.6e-05,1e-05,0.147273,0.024691,0.185185,0.703704,0.049383,0.037037,0.003636,...,38,4,71,3,136,6,45,171,12,9
X6P21.1.AMP,C2,0.069385,0.03991,0.061818,0.176471,0.441176,0.117647,0.029412,0.235294,0.010909,...,91,1,74,8,131,7,21,6,1,10
FAS,C1,3.6e-05,1e-05,0.096364,0.433962,0.113208,0.226415,0.169811,0.056604,0.041818,...,83,9,66,3,136,46,12,24,18,6
X9P21.3.DEL,C2,3.6e-05,1e-05,0.152727,0.107143,0.464286,0.083333,0.035714,0.309524,0.016364,...,88,3,72,26,113,10,46,8,3,32
SOCS1,C3,0.00294,0.00126,0.130909,0.222222,0.138889,0.25,0.25,0.138889,0.029091,...,77,18,57,10,129,31,18,35,30,19
BTG1,C5,3.6e-05,1e-05,0.256364,0.184397,0.12766,0.070922,0.212766,0.404255,0.047273,...,85,30,45,57,82,38,29,17,50,89
