In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pickle

import numpy as np
import pandas as pd

import settings as conf

In [3]:
output_dir = os.path.join(conf.DELIVERABLES_DIR, 'roc_validation', 'classifier_tables')
os.makedirs(output_dir, exist_ok=True)

# Load gene mappings

In [4]:
with open(os.path.join(conf.GENES_METADATA_DIR, 'genes_mapping_simplified-0.pkl'), 'rb') as f:
    genes_mapping_0 = pickle.load(f)

with open(os.path.join(conf.GENES_METADATA_DIR, 'genes_mapping_simplified-1.pkl'), 'rb') as f:
    genes_mapping_1 = pickle.load(f)

# Load fastENLOC results

In [5]:
spredixcan_genes_associations_filename = os.path.join(conf.GENE_ASSOC_DIR, 'fastenloc-torus-rcp.pkl.xz')
display(spredixcan_genes_associations_filename)

spredixcan_genes_associations = pd.read_pickle(spredixcan_genes_associations_filename)

'/mnt/phenomexcan_base/gene_assoc/fastenloc-torus-rcp.pkl.xz'

In [6]:
#spredixcan_genes_associations = spredixcan_genes_associations.rename(index=genes_mapping_1)

In [7]:
spredixcan_genes_associations.shape

(38062, 4091)

In [8]:
spredixcan_genes_associations.head(5)

Unnamed: 0_level_0,O46-Diagnoses_main_ICD10_O46_Antepartum_haemorrhage_not_elsewhere_classified,K30-Diagnoses_main_ICD10_K30_Dyspepsia,2907-Ever_stopped_smoking_for_6_months,H7_DIPLOPIA-Diplopia,1538_0-Major_dietary_changes_in_the_last_5_years_No,5663-Length_of_longest_manicirritable_episode,20002_1538-Noncancer_illness_code_selfreported_arthritis_nos,S30-Diagnoses_main_ICD10_S30_Superficial_injury_of_abdomen_lower_back_and_pelvis,24010_raw-Inverse_distance_to_the_nearest_road,3143_raw-Ankle_spacing_width,...,2237-Plays_computer_games,20002_1461-Noncancer_illness_code_selfreported_inflammatory_bowel_disease,20002_1508-Noncancer_illness_code_selfreported_jaundice_unknown_cause,20003_1140881882-Treatmentmedication_code_timoptol_025_eye_drops,22601_71253330-Job_coding_merchandiser_window_dresser,23112_raw-Leg_fat_mass_right,20003_1140861778-Treatmentmedication_code_dipyridamole,20003_1199-Treatmentmedication_code_food_supplementplantherbal_extract,1309-Fresh_fruit_intake,100920_2105-Type_milk_consumed_soya_with_calcium
gene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,,,,,0.001213,,,,,,...,,,,,,,,,0.002131,
ENSG00000000457,,,,,0.001873,,,,,0.01772,...,0.068852,,,,,0.003383,,,0.009195,
ENSG00000000460,,,,,0.00262,,,0.000207,,0.000105,...,0.093284,,,,,0.00879,,,0.003105,
ENSG00000000938,,,,,0.002928,,,,0.000762,0.012773,...,0.0043,,,0.000424,,0.000612,,,0.00324,
ENSG00000000971,,,,,0.002858,,,,,0.019304,...,0.005419,,,,,0.007427,,,0.004804,


# Load OMIM silver standard

In [9]:
omim_silver_standard = pd.read_csv(os.path.join(conf.DATA_DIR, 'omim_silver_standard.tsv'), sep='\t')

In [10]:
omim_silver_standard = omim_silver_standard.dropna(subset=['ensembl_gene_id', 'trait', 'pheno_mim'])

In [11]:
display(omim_silver_standard.shape)
display(omim_silver_standard.head())

(7809, 7)

Unnamed: 0,trait,pheno_mim,mim,entry_type,entrez_gene_id,approved_gene_symbol,ensembl_gene_id
0,M41-Diagnoses_main_ICD10_M41_Scoliosis,101800,188830,gene,5573,PRKAR1A,ENSG00000108946
1,M41-Diagnoses_main_ICD10_M41_Scoliosis,102500,600275,gene,4853,NOTCH2,ENSG00000134250
2,M41-Diagnoses_main_ICD10_M41_Scoliosis,105830,601623,gene,7337,UBE3A,ENSG00000114062
3,M41-Diagnoses_main_ICD10_M41_Scoliosis,108120,190990,gene,7169,TPM2,ENSG00000198467
4,M41-Diagnoses_main_ICD10_M41_Scoliosis,108145,613629,gene,63895,PIEZO2,ENSG00000154864


# Read gwas2gene results

In [12]:
from glob import glob

import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
pandas2ri.activate()

In [13]:
readRDS = robjects.r['readRDS']

In [14]:
f_files = glob(os.path.join(conf.OMIM_SILVER_STANDARD_GWAS_TO_GENE_DIR, '*.rds'))
display(len(f_files))

if len(f_files) != len(omim_silver_standard['trait'].unique()):
    print(f'WARNING: some files are not there. {len(omim_silver_standard["trait"].unique())} expected, {len(f_files)} found.')

99



In [15]:
gwas2genes_results = {}

for f in f_files:
    f_base = os.path.basename(f)
    f_code, _ = os.path.splitext(f_base)
    
    #print(f_base)
    rds_contents = readRDS(f)
    
    if len(rds_contents[1]) > 0:
        f_gene_list = list(rds_contents[1][0].iter_labels())
    else:
        print(f'{f_code}: empty')
        f_gene_list = []
    
    gwas2genes_results[f_code] = spredixcan_genes_associations.index.intersection(set(f_gene_list))

In [16]:
len(gwas2genes_results)

99

In [17]:
gwas2gene_all_genes = []

for k in gwas2genes_results.keys():
    gwas2gene_all_genes.extend(gwas2genes_results[k])

display(len(gwas2gene_all_genes))

gwas2gene_all_genes = set(gwas2gene_all_genes)
display(len(gwas2gene_all_genes))

# gwas2gene_all_genes = spredixcan_genes_associations.index.intersection(gwas2gene_all_genes)
# display(len(gwas2gene_all_genes))

19709

9722

# Create list of UKB-OMIM traits

In [18]:
omim_silver_standard.head()

Unnamed: 0,trait,pheno_mim,mim,entry_type,entrez_gene_id,approved_gene_symbol,ensembl_gene_id
0,M41-Diagnoses_main_ICD10_M41_Scoliosis,101800,188830,gene,5573,PRKAR1A,ENSG00000108946
1,M41-Diagnoses_main_ICD10_M41_Scoliosis,102500,600275,gene,4853,NOTCH2,ENSG00000134250
2,M41-Diagnoses_main_ICD10_M41_Scoliosis,105830,601623,gene,7337,UBE3A,ENSG00000114062
3,M41-Diagnoses_main_ICD10_M41_Scoliosis,108120,190990,gene,7169,TPM2,ENSG00000198467
4,M41-Diagnoses_main_ICD10_M41_Scoliosis,108145,613629,gene,63895,PIEZO2,ENSG00000154864


# Create PrediXcan classifier table

In [19]:
_tmp = omim_silver_standard[['trait', 'ensembl_gene_id']]
ukb_traits_common = _tmp['trait'].unique()

omim_true_classes = _tmp[['trait', 'ensembl_gene_id']].drop_duplicates()
omim_true_classes = omim_true_classes.assign(omim_value=1)
omim_true_classes = omim_true_classes.set_index(['trait', 'ensembl_gene_id'])

In [20]:
len(ukb_traits_common)

107

In [21]:
omim_true_classes.shape

(7046, 1)

In [22]:
omim_true_classes.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,omim_value
trait,ensembl_gene_id,Unnamed: 2_level_1
M41-Diagnoses_main_ICD10_M41_Scoliosis,ENSG00000108946,1
M41-Diagnoses_main_ICD10_M41_Scoliosis,ENSG00000134250,1
M41-Diagnoses_main_ICD10_M41_Scoliosis,ENSG00000114062,1
M41-Diagnoses_main_ICD10_M41_Scoliosis,ENSG00000198467,1
M41-Diagnoses_main_ICD10_M41_Scoliosis,ENSG00000154864,1


In [23]:
len(ukb_traits_common)

107

In [24]:
np.sum([len(k) for k in gwas2genes_results.values()])

19709

In [25]:
from entity import Trait

In [26]:
index_tuples = []

for t in ukb_traits_common:
    t_code = Trait(full_code=t).code
    if t_code not in gwas2genes_results:
        continue
    
    for g in gwas2genes_results[t_code]:
        index_tuples.append((t, g))

In [27]:
len(index_tuples)

19709

In [28]:
index_tuples[:5]

[('M41-Diagnoses_main_ICD10_M41_Scoliosis', 'ENSG00000012504'),
 ('M41-Diagnoses_main_ICD10_M41_Scoliosis', 'ENSG00000075089'),
 ('M41-Diagnoses_main_ICD10_M41_Scoliosis', 'ENSG00000075336'),
 ('M41-Diagnoses_main_ICD10_M41_Scoliosis', 'ENSG00000078401'),
 ('M41-Diagnoses_main_ICD10_M41_Scoliosis', 'ENSG00000090263')]

In [29]:
classifier_index = pd.MultiIndex.from_tuples(
    index_tuples,
    names=['ukb_efo', 'gene']
)

In [30]:
len(gwas2gene_all_genes)

9722

In [31]:
classifier_index.shape

(19709,)

In [32]:
predixcan_classifier_df = pd.DataFrame(index=classifier_index, columns=['score', 'predicted_class', 'true_class'])

In [33]:
predixcan_classifier_df = predixcan_classifier_df.sort_index()

In [34]:
predixcan_classifier_df.shape

(19709, 3)

In [35]:
predixcan_classifier_df['true_class'] = 0

In [36]:
predixcan_classifier_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,score,predicted_class,true_class
ukb_efo,gene,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1200-Sleeplessness_insomnia,ENSG00000001617,,,0
1200-Sleeplessness_insomnia,ENSG00000003756,,,0
1200-Sleeplessness_insomnia,ENSG00000004534,,,0
1200-Sleeplessness_insomnia,ENSG00000004838,,,0
1200-Sleeplessness_insomnia,ENSG00000004897,,,0


In [37]:
true_classes = omim_true_classes.squeeze()
display(true_classes.shape)
display(true_classes.head())

(7046,)

trait                                   ensembl_gene_id
M41-Diagnoses_main_ICD10_M41_Scoliosis  ENSG00000108946    1
                                        ENSG00000134250    1
                                        ENSG00000114062    1
                                        ENSG00000198467    1
                                        ENSG00000154864    1
Name: omim_value, dtype: int64

In [38]:
predixcan_classifier_df.loc[predixcan_classifier_df.index.intersection(true_classes.index), 'true_class'] = 1

In [39]:
assert predixcan_classifier_df['true_class'].isna().sum() == 0

In [40]:
# some testing
predixcan_classifier_df.loc[('M41-Diagnoses_main_ICD10_M41_Scoliosis',)].head()

Unnamed: 0_level_0,score,predicted_class,true_class
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ENSG00000012504,,,0
ENSG00000075089,,,0
ENSG00000075336,,,0
ENSG00000078401,,,0
ENSG00000090263,,,0


In [41]:
true_classes.loc[('M41-Diagnoses_main_ICD10_M41_Scoliosis', 'ENSG00000112234')]

1

In [42]:
'ENSG00000090263' not in true_classes.loc['M41-Diagnoses_main_ICD10_M41_Scoliosis'].index

True

In [43]:
assert predixcan_classifier_df.loc[('M41-Diagnoses_main_ICD10_M41_Scoliosis', 'ENSG00000112234'), 'true_class'] == 1.0
assert predixcan_classifier_df.loc[('M41-Diagnoses_main_ICD10_M41_Scoliosis', 'ENSG00000090263'), 'true_class'] == 0.0

In [44]:
len(gwas2gene_all_genes)

9722

In [45]:
# score
df_score = pd.Series(index=classifier_index, dtype=float)

for trait in ukb_traits_common:
    trait_code = Trait(full_code=trait).code
    if trait_code not in gwas2genes_results:
        print(trait_code)
        continue
    trait_genes = gwas2genes_results[trait_code]
    if len(trait_genes) == 0:
        print(f'Empty: {trait}')
        continue
    
    scores = spredixcan_genes_associations.loc[trait_genes, trait]
    df_score.loc[trait] = scores.values

M13
I95
I71
20002_1264
20002_1081
22130
C80
20002_1538


In [46]:
# for fastENLOC fillna (because it discards results with RCP < 1e-04)
df_score = df_score.fillna(0.0).sort_index()
assert df_score.isna().sum().sum() == 0

In [47]:
df_score.head()

ukb_efo                      gene           
1200-Sleeplessness_insomnia  ENSG00000001617    0.021790
                             ENSG00000003756    0.060830
                             ENSG00000004534    0.395645
                             ENSG00000004838    0.000435
                             ENSG00000004897    0.000000
dtype: float64

In [48]:
df_score.index.get_level_values('ukb_efo').unique().shape

(99,)

In [49]:
# some testing
_gene, _trait = ('ENSG00000090263', 'M41-Diagnoses_main_ICD10_M41_Scoliosis')
assert pd.isnull(spredixcan_genes_associations.loc[_gene, _trait])
assert df_score.loc[_trait, _gene] == 0.0

In [50]:
_gene, _trait = ('ENSG00000070061', 'O14-Diagnoses_main_ICD10_O14_Gestational_pregnancyinduced_hypertension_with_significant_proteinuria')
assert pd.isnull(spredixcan_genes_associations.loc[_gene, _trait])
assert df_score.loc[_trait, _gene] == 0.0

In [51]:
df_score.dtype

dtype('float64')

In [52]:
df_score.shape

(19709,)

In [53]:
df_score.head()

ukb_efo                      gene           
1200-Sleeplessness_insomnia  ENSG00000001617    0.021790
                             ENSG00000003756    0.060830
                             ENSG00000004534    0.395645
                             ENSG00000004838    0.000435
                             ENSG00000004897    0.000000
dtype: float64

In [54]:
df_score.describe()

count    19709.000000
mean         0.023897
std          0.112420
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000213
max          1.742724
dtype: float64

In [55]:
predixcan_classifier_df = predixcan_classifier_df.assign(score=df_score)

In [56]:
# assert not predixcan_classifier_df['score'].isna().any()

In [57]:
from scipy import stats

In [58]:
_n_genes = len(gwas2gene_all_genes)
display(_n_genes)

SCORE_THRESHOLD = 0.1
display(SCORE_THRESHOLD)

9722

0.1

In [59]:
predixcan_classifier_df = predixcan_classifier_df.assign(predicted_class=(predixcan_classifier_df['score'] > SCORE_THRESHOLD).astype(int))

In [60]:
predixcan_classifier_df.shape

(19709, 3)

In [61]:
predixcan_classifier_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,score,predicted_class,true_class
ukb_efo,gene,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1200-Sleeplessness_insomnia,ENSG00000001617,0.02179,0,0
1200-Sleeplessness_insomnia,ENSG00000003756,0.06083,0,0
1200-Sleeplessness_insomnia,ENSG00000004534,0.395645,1,0
1200-Sleeplessness_insomnia,ENSG00000004838,0.000435,0,0
1200-Sleeplessness_insomnia,ENSG00000004897,0.0,0,0


In [62]:
predixcan_classifier_df.loc['M41-Diagnoses_main_ICD10_M41_Scoliosis'].sort_values('true_class', ascending=False).head()

Unnamed: 0_level_0,score,predicted_class,true_class
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ENSG00000157764,0.0,0,1
ENSG00000162769,0.0,0,1
ENSG00000112234,0.0,0,1
ENSG00000012504,0.0,0,0
ENSG00000173208,0.0,0,0


## Select genes per trait

In [63]:
#selected_predixcan_classifier_df = predixcan_classifier_df.loc[predixcan_classifier_df.index.intersection(trait_genes_to_keep)]
selected_predixcan_classifier_df = predixcan_classifier_df

In [64]:
# some testing

In [65]:
selected_predixcan_classifier_df.shape

(19709, 3)

In [66]:
selected_predixcan_classifier_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,score,predicted_class,true_class
ukb_efo,gene,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1200-Sleeplessness_insomnia,ENSG00000001617,0.02179,0,0
1200-Sleeplessness_insomnia,ENSG00000003756,0.06083,0,0
1200-Sleeplessness_insomnia,ENSG00000004534,0.395645,1,0
1200-Sleeplessness_insomnia,ENSG00000004838,0.000435,0,0
1200-Sleeplessness_insomnia,ENSG00000004897,0.0,0,0


In [67]:
selected_predixcan_classifier_df.sort_values('predicted_class', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,score,predicted_class,true_class
ukb_efo,gene,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20002_1065-Noncancer_illness_code_selfreported_hypertension,ENSG00000103510,0.786094,1,0
20002_1111-Noncancer_illness_code_selfreported_asthma,ENSG00000107485,0.859,1,0
20002_1226-Noncancer_illness_code_selfreported_hypothyroidismmyxoedema,ENSG00000198400,0.777269,1,0
1980-Worrier_anxious_feelings,ENSG00000154319,0.1691,1,0
20002_1466-Noncancer_illness_code_selfreported_gout,ENSG00000175727,0.3878,1,0


In [68]:
_tmp = selected_predixcan_classifier_df.sort_values(['true_class', 'ukb_efo'], ascending=False)
display(_tmp.shape)
display(_tmp[_tmp['true_class'] > 0].shape)
display(_tmp[_tmp['true_class'] > 0].head())

(19709, 3)

(126, 3)

Unnamed: 0_level_0,Unnamed: 1_level_0,score,predicted_class,true_class
ukb_efo,gene,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
R69-Diagnoses_main_ICD10_R69_Unknown_and_unspecified_causes_of_morbidity,ENSG00000068305,0.0,0,1
R69-Diagnoses_main_ICD10_R69_Unknown_and_unspecified_causes_of_morbidity,ENSG00000140443,0.0,0,1
R40-Diagnoses_main_ICD10_R40_Somnolence_stupor_and_coma,ENSG00000133812,0.0,0,1
R40-Diagnoses_main_ICD10_R40_Somnolence_stupor_and_coma,ENSG00000183287,0.0,0,1
N20-Diagnoses_main_ICD10_N20_Calculus_of_kidney_and_ureter,ENSG00000131183,0.02435,0,1


### Test classes

In [69]:
selected_predixcan_classifier_df.index.get_level_values('ukb_efo').unique().shape

(99,)

In [70]:
selected_predixcan_classifier_df.index.get_level_values('gene').unique().shape

(9722,)

In [71]:
_pheno = 'N20-Diagnoses_main_ICD10_N20_Calculus_of_kidney_and_ureter'

In [72]:
_clinvar_asthma_genes = omim_silver_standard[omim_silver_standard['trait'] == _pheno]['ensembl_gene_id'].unique()
display(_clinvar_asthma_genes)
display(_clinvar_asthma_genes.shape)

array(['ENSG00000075891', 'ENSG00000130600', 'ENSG00000269821',
       'ENSG00000129757', 'ENSG00000143473', 'ENSG00000275410',
       'ENSG00000019186', 'ENSG00000134371', 'ENSG00000036828',
       'ENSG00000088256', 'ENSG00000187091', 'ENSG00000122194',
       'ENSG00000138592', 'ENSG00000040531', 'ENSG00000138079',
       'ENSG00000021488', 'ENSG00000090402', 'ENSG00000131482',
       'ENSG00000137700', 'ENSG00000164007', 'ENSG00000113946',
       'ENSG00000116039', 'ENSG00000168000', 'ENSG00000123191',
       'ENSG00000171365', 'ENSG00000165704', 'ENSG00000042753',
       'ENSG00000075643', 'ENSG00000169692', 'ENSG00000109667',
       'ENSG00000131183', 'ENSG00000109062', 'ENSG00000149257',
       'ENSG00000198931', 'ENSG00000157388', 'ENSG00000167207',
       'ENSG00000124827', 'ENSG00000134873'], dtype=object)

(38,)

In [73]:
_tmp = selected_predixcan_classifier_df.loc[_pheno]
_tmp.loc[_tmp.index.intersection(_clinvar_asthma_genes)]

Unnamed: 0,score,predicted_class,true_class
ENSG00000131183,0.02435,0,1


In [74]:
_predixcan_asthma_genes = selected_predixcan_classifier_df.loc[_pheno]

In [75]:
_predixcan_asthma_genes.head()

Unnamed: 0_level_0,score,predicted_class,true_class
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ENSG00000005187,0.0,0,0
ENSG00000005189,0.0,0,0
ENSG00000011638,0.0,0,0
ENSG00000027847,0.0,0,0
ENSG00000048140,0.0,0,0


In [76]:
selected_predixcan_classifier_df.shape

(19709, 3)

In [77]:
selected_predixcan_classifier_df['predicted_class'].value_counts()

0    18730
1      979
Name: predicted_class, dtype: int64

In [78]:
selected_predixcan_classifier_df['true_class'].value_counts()

0    19583
1      126
Name: true_class, dtype: int64

In [79]:
selected_predixcan_classifier_df.sort_values(['true_class'], ascending=[False])

Unnamed: 0_level_0,Unnamed: 1_level_0,score,predicted_class,true_class
ukb_efo,gene,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20002_1065-Noncancer_illness_code_selfreported_hypertension,ENSG00000179218,0.000679,0,1
20002_1065-Noncancer_illness_code_selfreported_hypertension,ENSG00000111252,0.069440,0,1
E66-Diagnoses_main_ICD10_E66_Obesity,ENSG00000176842,0.000000,0,1
20002_1065-Noncancer_illness_code_selfreported_hypertension,ENSG00000111057,0.000000,0,1
22127-Doctor_diagnosed_asthma,ENSG00000232810,0.000000,0,1
20002_1111-Noncancer_illness_code_selfreported_asthma,ENSG00000124299,0.000000,0,1
20002_1065-Noncancer_illness_code_selfreported_hypertension,ENSG00000125730,0.000602,0,1
20002_1456-Noncancer_illness_code_selfreported_malabsorptioncoeliac_disease,ENSG00000196735,0.000000,0,1
R40-Diagnoses_main_ICD10_R40_Somnolence_stupor_and_coma,ENSG00000133812,0.000000,0,1
M05-Diagnoses_main_ICD10_M05_Seropositive_rheumatoid_arthritis,ENSG00000204498,0.000255,0,1


# Save classifier table

In [80]:
# remove nans
selected_predixcan_classifier_df = selected_predixcan_classifier_df.dropna()

In [81]:
selected_predixcan_classifier_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,score,predicted_class,true_class
ukb_efo,gene,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1200-Sleeplessness_insomnia,ENSG00000001617,0.02179,0,0
1200-Sleeplessness_insomnia,ENSG00000003756,0.06083,0,0
1200-Sleeplessness_insomnia,ENSG00000004534,0.395645,1,0
1200-Sleeplessness_insomnia,ENSG00000004838,0.000435,0,0
1200-Sleeplessness_insomnia,ENSG00000004897,0.0,0,0


In [82]:
selected_predixcan_classifier_df.shape

(19709, 3)

In [83]:
selected_predixcan_classifier_df.to_csv(
    os.path.join(output_dir, 'fastenloc-torus-classifier_data.tsv.gz'),
    sep='\t', index=False
)