In [1]:
%load_ext autoreload
%autoreload 2

In [23]:
import os
import pickle
from glob import glob
#import re
from concurrent.futures import ProcessPoolExecutor, as_completed

import numpy as np
import pandas as pd
#from tqdm import tqdm
from scipy import stats
#from sklearn.metrics import pairwise_distances

import settings as conf
from utils import is_number, chunker
from results.multixcan import MXPhenoInfo, MXPhenoResults

# Load S-MultiXcan results

## From Rapid GWAS project

In [3]:
_path = os.path.join(conf.SMULTIXCAN_RESULTS_DIR['RapidGWASProject'], '*.tsv.gz')
display(_path)
all_smultixcan_results_dirs = glob(_path)
display(len(all_smultixcan_results_dirs))
assert len(all_smultixcan_results_dirs) == conf.SMULTIXCAN_EXPECTED_PHENOTYPES['RapidGWASProject']

'/mnt/phenomexcan_base/results/smultixcan/rapid_gwas_project/*.tsv.gz'

4049

In [4]:
all_smultixcan_phenotypes = [MXPhenoResults(p) for p in all_smultixcan_results_dirs]
all_smultixcan_phenotypes_plain_names = pd.Index([p.pheno_info.get_plain_name() for p in all_smultixcan_phenotypes])

display(len(all_smultixcan_phenotypes))
assert len(all_smultixcan_phenotypes) == conf.SMULTIXCAN_EXPECTED_PHENOTYPES['RapidGWASProject']

4049

## From GTEx GWAS manuscript

In [5]:
_path = os.path.join(conf.SMULTIXCAN_RESULTS_DIR['GTEX_GWAS'], '*_ccn30.txt')
display(_path)
all_extra_results_dirs = glob(_path)
display(len(all_extra_results_dirs))
assert len(all_extra_results_dirs) == conf.SMULTIXCAN_EXPECTED_PHENOTYPES['GTEX_GWAS']

'/mnt/phenomexcan_base/results/smultixcan/gtex_gwas/*_ccn30.txt'

42

In [6]:
_file_pattern = '(?P<code>[^/]+)_smultixcan_imputed_gwas_gtexv8mashr_ccn30\.txt'
all_extra_phenotypes = [MXPhenoResults(p, _file_pattern) for p in all_extra_results_dirs]
all_extra_phenotypes_plain_names = pd.Index([p.pheno_info.get_plain_name() for p in all_extra_phenotypes])

display(len(all_extra_phenotypes))
assert len(all_extra_phenotypes) == conf.SMULTIXCAN_EXPECTED_PHENOTYPES['GTEX_GWAS']

42

## Run loading

This read all phenotypes results (S-MultiXcan) and saves them all into a Pandas DataFrame

In [7]:
def _get_combined_results(phenos, column):
    return {
        pheno.pheno_info.get_plain_name() :
        pheno.get_data(cols=['gene_name', column], index_col='gene_simple')[column]
        for pheno in phenos
    }

In [8]:
def _run_all(column_name, phenotype_chunks, n_jobs=20):
    print(column_name, flush=True)
    
    all_results = {}
    
    with ProcessPoolExecutor(max_workers=n_jobs) as executor:
        tasks = [executor.submit(_get_combined_results, chunk, column_name) for chunk in phenotype_chunks]
        for future in as_completed(tasks):
            res = future.result()
            all_results.update(res)
    
    return all_results

In [9]:
# phenotype_chunks = chunker(all_smultixcan_phenotypes[:5] + all_extra_phenotypes[:5], 200)
phenotype_chunks = chunker(all_smultixcan_phenotypes + all_extra_phenotypes, 200)

In [10]:
all_results = _run_all('pvalue', phenotype_chunks)

pvalue


## Load results and save as DataFrame

In [11]:
_n_expected_phenos = np.sum(list(conf.SMULTIXCAN_EXPECTED_PHENOTYPES.values()))
display(_n_expected_phenos)
assert len(all_results) == _n_expected_phenos

4091

In [12]:
smultixcan_genes_associations = pd.DataFrame(all_results)
smultixcan_genes_associations.index.rename('gene_name', inplace=True)

assert smultixcan_genes_associations.index.is_unique

display(smultixcan_genes_associations.shape)
display(smultixcan_genes_associations.head())

(22518, 4091)

Unnamed: 0_level_0,22617_5223-Job_SOC_coding_Metal_working_production_and_maintenance_fitters,20003_1140911734-Treatmentmedication_code_ginkgo_forte_tablet,136-Number_of_operations_selfreported,I70-Diagnoses_main_ICD10_I70_Atherosclerosis,20003_1141152732-Treatmentmedication_code_mirtazapine,20002_1459-Noncancer_illness_code_selfreported_colitisnot_crohns_or_ulcerative_colitis,20002_1434-Noncancer_illness_code_selfreported_other_neurological_problem,20003_1141195224-Treatmentmedication_code_formoterol,20553_5-Methods_of_selfharm_used_Selfinjury_such_as_selfcutting_scratching_or_hitting_etc,C_CORPUS_UTERI-Malignant_neoplasm_of_corpus_uteri,...,PGC_ADHD_EUR_2017,BCAC_ER_negative_BreastCancer_EUR,SSGAC_Education_Years_Pooled,pgc.scz2,MAGNETIC_HDL.C,MAGIC_ln_FastingInsulin,Astle_et_al_2016_Sum_eosinophil_basophil_counts,SSGAC_Depressive_Symptoms,Jones_et_al_2016_SleepDuration,Astle_et_al_2016_Red_blood_cell_count
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,0.329127,0.374926,0.057807,0.633136,0.725014,0.074453,0.002245,0.31707,0.040515,0.861529,...,0.980281,0.413973,0.747822,0.609467,0.019467,0.371748,0.82301,0.71331,0.858389,0.409761
ENSG00000000457,0.257609,0.707281,0.366313,0.525725,0.231869,0.601362,0.191479,0.267365,0.552215,0.19284,...,0.228982,0.63683,0.654535,0.010907,0.7263,0.210658,0.008023,0.678749,0.836679,0.812484
ENSG00000000460,0.886257,0.610799,0.710933,0.62335,0.206562,0.840616,0.140581,0.69604,0.939619,0.446949,...,0.30146,0.140522,0.646442,0.587969,0.498724,0.521805,0.004462,0.736509,0.432229,0.486664
ENSG00000000938,0.513829,0.209518,0.497943,0.021543,0.442737,0.100253,0.733028,0.614598,0.392346,0.657822,...,0.588855,0.226977,0.576593,0.059247,0.435438,0.95316,0.101875,0.954998,0.097831,0.135045
ENSG00000000971,0.248434,0.906597,0.698141,0.281261,0.047256,0.959404,0.064124,0.856257,0.115725,0.738901,...,0.109883,0.040871,0.005662,0.020391,0.439466,0.690242,0.055059,0.00266,0.331132,0.99545


In [13]:
# Remove genes with no results
smultixcan_genes_associations = smultixcan_genes_associations.dropna(axis=0, how='all')

In [14]:
# how many entries are nan
smultixcan_genes_associations.isna().sum().sum()

1053055

In [15]:
# each UKB trait has 260 nan entries
260 * 4049

1052740

In [16]:
display(smultixcan_genes_associations.shape)

assert smultixcan_genes_associations.shape == (22515, _n_expected_phenos)

(22515, 4091)

In [17]:
# some testing

# For FinnGen
assert smultixcan_genes_associations.loc['ENSG00000110628', 'C_TONGUENAS-Malignant_neoplasm_of_other_and_unspecified_parts_of_tongue'] == 0.005086576789507484
assert smultixcan_genes_associations.loc['ENSG00000169783', 'C_TONGUENAS-Malignant_neoplasm_of_other_and_unspecified_parts_of_tongue'] == 0.3757187601354043
assert smultixcan_genes_associations.loc['ENSG00000137959', 'C_TONGUENAS-Malignant_neoplasm_of_other_and_unspecified_parts_of_tongue'] == 5.132614371931036e-07

# For ICD10
assert smultixcan_genes_associations.loc['ENSG00000135775', 'N18-Diagnoses_main_ICD10_N18_Chronic_renal_failure'] == 2.795075036067939e-05
assert smultixcan_genes_associations.loc['ENSG00000169783', 'N18-Diagnoses_main_ICD10_N18_Chronic_renal_failure'] == 0.06668736815697908
assert smultixcan_genes_associations.loc['ENSG00000174226', 'N18-Diagnoses_main_ICD10_N18_Chronic_renal_failure'] == 0.02496852053808064

# For extra phenotypes
assert smultixcan_genes_associations.loc['ENSG00000135775', 'MAGIC_ln_FastingInsulin'] == 0.08712399858507687
assert smultixcan_genes_associations.loc['ENSG00000169783', 'IMMUNOBASE_Systemic_lupus_erythematosus_hg19'] == 0.10974365378971256
assert smultixcan_genes_associations.loc['ENSG00000158691', 'pgc.scz2'] == 2.698821020217747e-28

### Remove zero pvalues

In [46]:
assert not smultixcan_genes_associations.isin([np.inf, -np.inf]).any().any()

In [47]:
smultixcan_genes_associations.isna().any().any()

True

In [48]:
all_pvals = pd.Series(smultixcan_genes_associations.values.flatten())

In [49]:
all_pvals.describe()

count    9.105581e+07
mean     4.839226e-01
std      2.934763e-01
min      0.000000e+00
25%      2.266266e-01
50%      4.794625e-01
75%      7.378647e-01
max      1.000000e+00
dtype: float64

In [50]:
_tmp = all_pvals.sort_values(ascending=True)
display(_tmp[_tmp > 0].head())

22852908    1.222488e-311
21201099    2.021948e-311
5437461     2.320165e-311
90963967    3.396667e-311
21232850    5.298136e-311
dtype: float64

In [51]:
_tmp = all_pvals.sort_values(ascending=False)
display(_tmp.head())

62629116    1.0
24177791    1.0
84385054    1.0
81529520    1.0
81545900    1.0
dtype: float64

In [52]:
# replace 0.0 pvals
smultixcan_genes_associations = smultixcan_genes_associations.replace(0.0, 1e-320)

In [53]:
all_pvals = pd.Series(smultixcan_genes_associations.values.flatten()).dropna()

In [54]:
assert (all_pvals > 0).all()
assert (all_pvals <= 1).all()

### Save

In [55]:
smultixcan_genes_associations.shape

(22515, 4091)

In [56]:
smultixcan_genes_associations.head()

Unnamed: 0_level_0,22617_5223-Job_SOC_coding_Metal_working_production_and_maintenance_fitters,20003_1140911734-Treatmentmedication_code_ginkgo_forte_tablet,136-Number_of_operations_selfreported,I70-Diagnoses_main_ICD10_I70_Atherosclerosis,20003_1141152732-Treatmentmedication_code_mirtazapine,20002_1459-Noncancer_illness_code_selfreported_colitisnot_crohns_or_ulcerative_colitis,20002_1434-Noncancer_illness_code_selfreported_other_neurological_problem,20003_1141195224-Treatmentmedication_code_formoterol,20553_5-Methods_of_selfharm_used_Selfinjury_such_as_selfcutting_scratching_or_hitting_etc,C_CORPUS_UTERI-Malignant_neoplasm_of_corpus_uteri,...,PGC_ADHD_EUR_2017,BCAC_ER_negative_BreastCancer_EUR,SSGAC_Education_Years_Pooled,pgc.scz2,MAGNETIC_HDL.C,MAGIC_ln_FastingInsulin,Astle_et_al_2016_Sum_eosinophil_basophil_counts,SSGAC_Depressive_Symptoms,Jones_et_al_2016_SleepDuration,Astle_et_al_2016_Red_blood_cell_count
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,0.329127,0.374926,0.057807,0.633136,0.725014,0.074453,0.002245,0.31707,0.040515,0.861529,...,0.980281,0.413973,0.747822,0.609467,0.019467,0.371748,0.82301,0.71331,0.858389,0.409761
ENSG00000000457,0.257609,0.707281,0.366313,0.525725,0.231869,0.601362,0.191479,0.267365,0.552215,0.19284,...,0.228982,0.63683,0.654535,0.010907,0.7263,0.210658,0.008023,0.678749,0.836679,0.812484
ENSG00000000460,0.886257,0.610799,0.710933,0.62335,0.206562,0.840616,0.140581,0.69604,0.939619,0.446949,...,0.30146,0.140522,0.646442,0.587969,0.498724,0.521805,0.004462,0.736509,0.432229,0.486664
ENSG00000000938,0.513829,0.209518,0.497943,0.021543,0.442737,0.100253,0.733028,0.614598,0.392346,0.657822,...,0.588855,0.226977,0.576593,0.059247,0.435438,0.95316,0.101875,0.954998,0.097831,0.135045
ENSG00000000971,0.248434,0.906597,0.698141,0.281261,0.047256,0.959404,0.064124,0.856257,0.115725,0.738901,...,0.109883,0.040871,0.005662,0.020391,0.439466,0.690242,0.055059,0.00266,0.331132,0.99545


In [57]:
# Save
smultixcan_genes_associations_filename = os.path.join(conf.GENE_ASSOC_DIR, f'smultixcan-mashr-pvalues.pkl.xz')
display(smultixcan_genes_associations_filename)

'/mnt/phenomexcan_base/gene_assoc/smultixcan-mashr-pvalues.pkl.xz'

In [58]:
smultixcan_genes_associations.to_pickle(smultixcan_genes_associations_filename)

### Save for publication

In [59]:
# for publication
output_file = os.path.join(conf.GENE_ASSOC_DIR, 'smultixcan-mashr-pvalues.tsv.gz')
display(output_file)

smultixcan_genes_associations.to_csv(output_file, sep='\t', float_format='%.4e')

'/mnt/phenomexcan_base/gene_assoc/smultixcan-mashr-pvalues.tsv.gz'

In [60]:
# test "for publication" file
_tmp = pd.read_csv(output_file, sep='\t', index_col='gene_name')

In [61]:
display(_tmp.shape)
assert _tmp.shape == spredixcan_genes_associations.shape

(22515, 4091)

NameError: name 'spredixcan_genes_associations' is not defined

In [None]:
_tmp.head()

In [None]:
assert not _tmp.isin([np.inf, -np.inf]).any().any()

In [None]:
assert np.array_equal(spredixcan_genes_associations.isna(), _tmp.isna())

In [None]:
_tmp.isna().any().any()

In [None]:
_tmp_flat = pd.Series(_tmp.values.flatten()).dropna()
assert ((_tmp_flat > 0) & (_tmp_flat <= 1)).all().all()

In [None]:
assert np.allclose(_tmp.values, spredixcan_genes_associations.values, atol=1e-320, rtol=1e-4, equal_nan=True)

## Save zscores

In [24]:
zscores = np.abs(stats.norm.ppf(smultixcan_genes_associations / 2))

smultixcan_genes_associations_zscores = pd.DataFrame(zscores, index=smultixcan_genes_associations.index.copy(), columns=smultixcan_genes_associations.columns.copy())

display(smultixcan_genes_associations_zscores.shape)
display(smultixcan_genes_associations_zscores.head())

  cond1 = (0 < q) & (q < 1)
  cond1 = (0 < q) & (q < 1)


(22515, 4091)

Unnamed: 0_level_0,22617_5223-Job_SOC_coding_Metal_working_production_and_maintenance_fitters,20003_1140911734-Treatmentmedication_code_ginkgo_forte_tablet,136-Number_of_operations_selfreported,I70-Diagnoses_main_ICD10_I70_Atherosclerosis,20003_1141152732-Treatmentmedication_code_mirtazapine,20002_1459-Noncancer_illness_code_selfreported_colitisnot_crohns_or_ulcerative_colitis,20002_1434-Noncancer_illness_code_selfreported_other_neurological_problem,20003_1141195224-Treatmentmedication_code_formoterol,20553_5-Methods_of_selfharm_used_Selfinjury_such_as_selfcutting_scratching_or_hitting_etc,C_CORPUS_UTERI-Malignant_neoplasm_of_corpus_uteri,...,PGC_ADHD_EUR_2017,BCAC_ER_negative_BreastCancer_EUR,SSGAC_Education_Years_Pooled,pgc.scz2,MAGNETIC_HDL.C,MAGIC_ln_FastingInsulin,Astle_et_al_2016_Sum_eosinophil_basophil_counts,SSGAC_Depressive_Symptoms,Jones_et_al_2016_SleepDuration,Astle_et_al_2016_Red_blood_cell_count
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,0.975874,0.887284,1.897155,0.477317,0.351766,1.783821,3.055707,1.000496,2.048464,0.174428,...,0.024717,0.816922,0.321512,0.510834,2.336462,0.893203,0.223675,0.367414,0.178426,0.824314
ENSG00000000457,1.13206,0.37551,0.903401,0.634545,1.195559,0.522443,1.306215,1.10915,0.594444,1.302222,...,1.202984,0.472136,0.447472,2.545653,0.350052,1.251757,2.651112,0.414171,0.206143,0.237223
ENSG00000000460,0.143042,0.508933,0.370603,0.491108,1.263074,0.201105,1.473629,0.390671,0.075748,0.760512,...,1.033308,1.473851,0.458711,0.541782,0.676499,0.640565,2.843532,0.33648,0.785382,0.695624
ENSG00000000938,0.652887,1.254892,0.67773,2.298324,0.76758,1.643629,0.3411,0.503521,0.855371,0.442922,...,0.540496,1.208182,0.558368,1.886356,0.77992,0.058739,1.635829,0.056432,1.655463,1.494501
ENSG00000000971,1.154161,0.117332,0.387832,1.077491,1.983998,0.050902,1.851317,0.181141,1.572975,0.333309,...,1.598721,2.044839,2.766758,2.319072,0.773096,0.398527,1.918412,3.004544,0.971837,0.005703


In [25]:
smultixcan_genes_associations_zscores.loc['ENSG00000158691', 'pgc.scz2']

11.031317483379759

In [26]:
# some testing

# For FinnGen
assert smultixcan_genes_associations_zscores.loc['ENSG00000110628', 'C_TONGUENAS-Malignant_neoplasm_of_other_and_unspecified_parts_of_tongue'] == 2.8014991958592232
assert smultixcan_genes_associations_zscores.loc['ENSG00000169783', 'C_TONGUENAS-Malignant_neoplasm_of_other_and_unspecified_parts_of_tongue'] == 0.8858121525410351
assert smultixcan_genes_associations_zscores.loc['ENSG00000137959', 'C_TONGUENAS-Malignant_neoplasm_of_other_and_unspecified_parts_of_tongue'] == 5.021287959552069

# For ICD10
assert smultixcan_genes_associations_zscores.loc['ENSG00000135775', 'N18-Diagnoses_main_ICD10_N18_Chronic_renal_failure'] == 4.1895505583580785

# For extra phenotypes
assert smultixcan_genes_associations_zscores.loc['ENSG00000135775', 'MAGIC_ln_FastingInsulin'] == 1.71076773175347
assert smultixcan_genes_associations_zscores.loc['ENSG00000169783', 'IMMUNOBASE_Systemic_lupus_erythematosus_hg19'] == 1.5993464052052957
assert smultixcan_genes_associations_zscores.loc['ENSG00000158691', 'pgc.scz2'] == 11.031317483379759

In [27]:
assert np.array_equal(smultixcan_genes_associations.isna(), smultixcan_genes_associations_zscores.isna())

### Remove inf values

In [34]:
smultixcan_genes_associations_zscores.isin([np.inf, -np.inf]).any().any()

True

In [35]:
max_zscores = pd.Series(smultixcan_genes_associations_zscores.values.flatten())

In [36]:
_tmp = max_zscores.sort_values(ascending=False)
display(_tmp[~np.isinf(_tmp)].head())

22852908    37.737142
21201099    37.723815
5437461     37.720171
90963967    37.710072
21232850    37.698289
dtype: float64

In [37]:
_tmp = max_zscores.sort_values(ascending=True)
display(_tmp.head())

69399716    0.0
54058455    0.0
81104040    0.0
81104067    0.0
79259031    0.0
dtype: float64

In [38]:
smultixcan_genes_associations_zscores.isna().any().any()

True

In [39]:
# replace inf
smultixcan_genes_associations_zscores = smultixcan_genes_associations_zscores.replace(np.inf, 40)
assert not smultixcan_genes_associations_zscores.isin([np.inf, -np.inf]).any().any()

### Save

In [44]:
smultixcan_genes_associations_zscores.shape

(22515, 4091)

In [45]:
smultixcan_genes_associations_zscores.head()

Unnamed: 0_level_0,22617_5223-Job_SOC_coding_Metal_working_production_and_maintenance_fitters,20003_1140911734-Treatmentmedication_code_ginkgo_forte_tablet,136-Number_of_operations_selfreported,I70-Diagnoses_main_ICD10_I70_Atherosclerosis,20003_1141152732-Treatmentmedication_code_mirtazapine,20002_1459-Noncancer_illness_code_selfreported_colitisnot_crohns_or_ulcerative_colitis,20002_1434-Noncancer_illness_code_selfreported_other_neurological_problem,20003_1141195224-Treatmentmedication_code_formoterol,20553_5-Methods_of_selfharm_used_Selfinjury_such_as_selfcutting_scratching_or_hitting_etc,C_CORPUS_UTERI-Malignant_neoplasm_of_corpus_uteri,...,PGC_ADHD_EUR_2017,BCAC_ER_negative_BreastCancer_EUR,SSGAC_Education_Years_Pooled,pgc.scz2,MAGNETIC_HDL.C,MAGIC_ln_FastingInsulin,Astle_et_al_2016_Sum_eosinophil_basophil_counts,SSGAC_Depressive_Symptoms,Jones_et_al_2016_SleepDuration,Astle_et_al_2016_Red_blood_cell_count
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,0.975874,0.887284,1.897155,0.477317,0.351766,1.783821,3.055707,1.000496,2.048464,0.174428,...,0.024717,0.816922,0.321512,0.510834,2.336462,0.893203,0.223675,0.367414,0.178426,0.824314
ENSG00000000457,1.13206,0.37551,0.903401,0.634545,1.195559,0.522443,1.306215,1.10915,0.594444,1.302222,...,1.202984,0.472136,0.447472,2.545653,0.350052,1.251757,2.651112,0.414171,0.206143,0.237223
ENSG00000000460,0.143042,0.508933,0.370603,0.491108,1.263074,0.201105,1.473629,0.390671,0.075748,0.760512,...,1.033308,1.473851,0.458711,0.541782,0.676499,0.640565,2.843532,0.33648,0.785382,0.695624
ENSG00000000938,0.652887,1.254892,0.67773,2.298324,0.76758,1.643629,0.3411,0.503521,0.855371,0.442922,...,0.540496,1.208182,0.558368,1.886356,0.77992,0.058739,1.635829,0.056432,1.655463,1.494501
ENSG00000000971,1.154161,0.117332,0.387832,1.077491,1.983998,0.050902,1.851317,0.181141,1.572975,0.333309,...,1.598721,2.044839,2.766758,2.319072,0.773096,0.398527,1.918412,3.004544,0.971837,0.005703


In [40]:
# save
smultixcan_genes_associations_zscores_filename = os.path.join(conf.GENE_ASSOC_DIR, f'smultixcan-mashr-zscores.pkl.xz')
display(smultixcan_genes_associations_zscores_filename)

'/mnt/phenomexcan_base/gene_assoc/smultixcan-mashr-zscores.pkl.xz'

In [41]:
smultixcan_genes_associations_zscores.to_pickle(smultixcan_genes_associations_zscores_filename)