In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pickle
from glob import glob
from concurrent.futures import ProcessPoolExecutor, as_completed

import numpy as np
import pandas as pd
from scipy import stats

import settings as conf
import metadata
from utils import is_number, chunker
from results.multixcan import MXPhenoInfo, MXPhenoResults

In [3]:
os.makedirs(conf.WEBAPP_DIR, exist_ok=True)

# Gene mappings

In [4]:
metadata.GENES_MAPPINGS.head()

Unnamed: 0,gene,gene_name,gene_type,gene_id,band
0,ENSG00000000457.13,SCYL3,protein_coding,ENSG00000000457,1q24.2
1,ENSG00000000460.16,C1orf112,protein_coding,ENSG00000000460,1q24.2
2,ENSG00000000938.12,FGR,protein_coding,ENSG00000000938,1p35.3
3,ENSG00000000971.15,CFH,protein_coding,ENSG00000000971,1q31.3
4,ENSG00000001036.13,FUCA2,protein_coding,ENSG00000001036,6q24.2


In [5]:
gene_name_to_id_long = metadata.GENES_MAPPINGS[['gene_name', 'gene']].set_index('gene_name').to_dict()['gene']

In [6]:
gene_name_to_id_long['A2M']

'ENSG00000175899.14'

In [7]:
gene_id_long_to_name = metadata.GENES_MAPPINGS[['gene_name', 'gene']].set_index('gene').to_dict()['gene_name']

In [8]:
gene_id_long_to_name['ENSG00000175899.14']

'A2M'

In [9]:
gene_id_long_to_band = metadata.GENES_MAPPINGS[['band', 'gene']].set_index('gene').to_dict()['band']

In [10]:
gene_id_long_to_band['ENSG00000175899.14']

'12p13.31'

# Load UKB x ClinVar z2

In [11]:
ukb_clinvar_z2_filename = os.path.join(conf.GENE_ASSOC_DIR, 'smultixcan_and_clinvar-z2.pkl.xz')
display(ukb_clinvar_z2_filename)

'/mnt/phenomexcan_base/gene_assoc/smultixcan_and_clinvar-z2.pkl.xz'

In [12]:
ukb_clinvar_z2 = pd.read_pickle(ukb_clinvar_z2_filename)

In [13]:
ukb_clinvar_z2.shape

(4091, 5106)

In [14]:
ukb_clinvar_z2.head()

Unnamed: 0_level_0,Alzheimer's disease,Alpha-2-macroglobulin deficiency,"Otitis media, susceptibility to",p phenotype,Glucocorticoid deficiency with achalasia,Keratosis palmoplantaris papulosa,"Charcot-Marie-Tooth disease, type 2N","Epileptic encephalopathy, early infantile, 29",Combined oxidative phosphorylation deficiency 8,"Leukoencephalopathy, progressive, with ovarian failure",...,Retinitis pigmentosa 58,"Myopia 21, autosomal dominant",Paget disease of bone 6,Seborrhea-like dermatitis with psoriasiform elements,PEHO syndrome,Oocyte maturation defect 1,OOCYTE MATURATION DEFECT 6,OOCYTE MATURATION DEFECT 3,Acromelic frontonasal dysostosis,"NEURODEVELOPMENTAL DISORDER WITH MOVEMENT ABNORMALITIES, ABNORMAL GAIT, AND AUTISTIC FEATURES"
phenomexcan_traits,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20096_1-Size_of_red_wine_glass_drunk_small_125ml,0.651161,0.247607,3.820126,1.075217,0.009677,0.162067,1.670074,1.670074,0.273777,0.273777,...,0.292204,1.695379,0.355525,1.101905,0.37941,1.747519,0.27292,0.006041,0.194577,0.194577
2345-Ever_had_bowel_cancer_screening,0.544953,0.055165,0.373364,0.667099,1.731603,5.829613,0.000311,0.000311,0.872317,0.872317,...,0.871863,0.772939,0.120311,0.015428,0.332303,1.911665,2.069227,0.242165,0.392449,0.392449
N49-Diagnoses_main_ICD10_N49_Inflammatory_disorders_of_male_genital_organs_not_elsewhere_classified,2.352883,3.096874,0.208791,1.486795,0.166468,0.268138,2.304044,2.304044,0.007125,0.007125,...,0.02007,0.137331,0.744516,0.810886,0.222463,5.448538,0.087805,0.725028,0.155316,0.155316
100011_raw-Iron,2.905654,0.049563,11.132239,0.978171,0.4289,0.39541,0.199536,0.199536,0.638303,0.638303,...,0.000193,0.32735,0.436058,0.096868,0.173245,3.459105,0.144557,0.009105,0.015222,0.015222
5221-Index_of_best_refractometry_result_right,0.836999,0.071389,0.229481,0.340287,0.546314,2.319034,4.984327,4.984327,0.177313,0.177313,...,2.239274,1.115672,0.141618,0.01261,1.253595,0.031501,0.24487,0.001039,0.028283,0.028283


# Load S-MultiXcan gene associations

In [15]:
smultixcan_pvalues_file = os.path.join(conf.GENE_ASSOC_DIR, f'smultixcan-mashr-pvalues.pkl.xz')
display(smultixcan_pvalues_file)

'/mnt/phenomexcan_base/gene_assoc/smultixcan-mashr-pvalues.pkl.xz'

In [16]:
smultixcan_gene_associations = pd.read_pickle(smultixcan_pvalues_file)

In [17]:
smultixcan_gene_associations.shape

(22515, 4091)

In [18]:
smultixcan_gene_associations.head(5)

Unnamed: 0_level_0,20096_1-Size_of_red_wine_glass_drunk_small_125ml,2345-Ever_had_bowel_cancer_screening,N49-Diagnoses_main_ICD10_N49_Inflammatory_disorders_of_male_genital_organs_not_elsewhere_classified,100011_raw-Iron,5221-Index_of_best_refractometry_result_right,20003_1141150624-Treatmentmedication_code_zomig_25mg_tablet,S69-Diagnoses_main_ICD10_S69_Other_and_unspecified_injuries_of_wrist_and_hand,20024_1136-Job_code_deduced_Information_and_communication_technology_managers,20002_1385-Noncancer_illness_code_selfreported_allergy_or_anaphylactic_reaction_to_food,G6_SLEEPAPNO-Sleep_apnoea,...,Astle_et_al_2016_Sum_basophil_neutrophil_counts,RA_OKADA_TRANS_ETHNIC,pgc.scz2,PGC_ADHD_EUR_2017,MAGIC_FastingGlucose,Astle_et_al_2016_Red_blood_cell_count,SSGAC_Depressive_Symptoms,BCAC_ER_positive_BreastCancer_EUR,IBD.EUR.Inflammatory_Bowel_Disease,Astle_et_al_2016_High_light_scatter_reticulocyte_count
gene_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSG00000000419,0.865429,0.918314,0.810683,0.374671,0.189032,0.140981,0.467741,0.129427,0.19368,0.285479,...,0.41621,0.782554,0.609467,0.980281,0.666504,0.409761,0.71331,0.168319,0.460244,0.765506
ENSG00000000457,0.174192,0.064765,0.889194,0.896938,0.448596,0.269602,0.540261,0.068405,0.041813,0.313427,...,0.14936,0.512603,0.010907,0.228982,0.607081,0.812484,0.678749,0.918971,0.311187,0.344574
ENSG00000000460,0.879969,0.240715,0.238228,0.567555,0.92132,0.825036,0.78223,0.644525,0.392273,0.840014,...,0.50352,0.764147,0.587969,0.30146,0.629621,0.486664,0.736509,0.9336,0.000477,0.321223
ENSG00000000938,0.19267,0.400054,0.114353,0.4707,0.889202,1.1e-05,0.899764,0.212352,0.829671,0.372348,...,0.899212,0.961678,0.059247,0.588855,0.898525,0.135045,0.954998,0.08822,0.176497,0.304281
ENSG00000000971,0.180632,0.79306,0.490585,0.088752,0.744531,0.949639,0.253817,0.377408,0.971655,0.070266,...,0.390618,0.093824,0.020391,0.109883,0.870551,0.99545,0.00266,0.421588,0.656851,0.868416


# Load ClinVar gene associations

In [19]:
clinvar_genes_associations_filename = os.path.join(conf.GENE_ASSOC_DIR, 'clinvar-gene_associations.pkl.xz')
display(clinvar_genes_associations_filename)

'/mnt/phenomexcan_base/gene_assoc/clinvar-gene_associations.pkl.xz'

In [20]:
clinvar_genes_associations = pd.read_pickle(clinvar_genes_associations_filename)

In [21]:
clinvar_genes_associations.shape

(4194, 5586)

In [22]:
clinvar_genes_associations.head(5)

Unnamed: 0_level_0,Alzheimer's disease,Alpha-2-macroglobulin deficiency,"Otitis media, susceptibility to",p phenotype,Glucocorticoid deficiency with achalasia,Keratosis palmoplantaris papulosa,"Charcot-Marie-Tooth disease, type 2N","Epileptic encephalopathy, early infantile, 29",Combined oxidative phosphorylation deficiency 8,"Leukoencephalopathy, progressive, with ovarian failure",...,"Myopia 21, autosomal dominant",Paget disease of bone 6,ZNF711-Related X-linked Mental Retardation,Seborrhea-like dermatitis with psoriasiform elements,PEHO syndrome,Oocyte maturation defect 1,OOCYTE MATURATION DEFECT 6,OOCYTE MATURATION DEFECT 3,Acromelic frontonasal dysostosis,"NEURODEVELOPMENTAL DISORDER WITH MOVEMENT ABNORMALITIES, ABNORMAL GAIT, AND AUTISTIC FEATURES"
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A2M,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A2ML1,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
A4GALT,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AAAS,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AAGAB,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Compute unique list of genes and non-empty diseases

In [23]:
clinvar_genes_to_id = pd.Index([metadata.GENE_NAME_TO_ID_MAP[x] for x in clinvar_genes_associations.index if x in metadata.GENE_NAME_TO_ID_MAP])
display(len(clinvar_genes_to_id))

3790

In [24]:
shared_gene_ids = clinvar_genes_to_id.intersection(smultixcan_gene_associations.index)
display(len(shared_gene_ids))

3789

In [25]:
shared_gene_names = pd.Index([metadata.GENE_ID_TO_NAME_MAP[x] for x in shared_gene_ids])
display(len(shared_gene_names))

3789

In [26]:
assert len(shared_gene_ids) == len(shared_gene_names)

In [27]:
smultixcan_gene_associations = smultixcan_gene_associations.loc[shared_gene_ids]
clinvar_genes_associations = clinvar_genes_associations.loc[shared_gene_names]

In [28]:
_tmp = clinvar_genes_associations.sum()
_clinvar_diseases_to_remove = _tmp[_tmp == 0].index
display(_clinvar_diseases_to_remove.shape)

(480,)

In [29]:
# remove diseases that have no-genes associated (after selecting just shared ones above)
clinvar_genes_associations = clinvar_genes_associations.drop(columns=_clinvar_diseases_to_remove)

In [30]:
_tmp = clinvar_genes_associations.sum()
assert _tmp[_tmp == 0].shape[0] == 0

In [31]:
display(smultixcan_gene_associations.shape)
display(clinvar_genes_associations.shape)

(3789, 4091)

(3789, 5106)

# ClinVar: rename genes

In [32]:
clinvar_genes_associations = clinvar_genes_associations.rename(index=gene_name_to_id_long)

In [33]:
clinvar_genes_associations.shape

(3789, 5106)

In [34]:
clinvar_genes_associations.head()

Unnamed: 0,Alzheimer's disease,Alpha-2-macroglobulin deficiency,"Otitis media, susceptibility to",p phenotype,Glucocorticoid deficiency with achalasia,Keratosis palmoplantaris papulosa,"Charcot-Marie-Tooth disease, type 2N","Epileptic encephalopathy, early infantile, 29",Combined oxidative phosphorylation deficiency 8,"Leukoencephalopathy, progressive, with ovarian failure",...,Retinitis pigmentosa 58,"Myopia 21, autosomal dominant",Paget disease of bone 6,Seborrhea-like dermatitis with psoriasiform elements,PEHO syndrome,Oocyte maturation defect 1,OOCYTE MATURATION DEFECT 6,OOCYTE MATURATION DEFECT 3,Acromelic frontonasal dysostosis,"NEURODEVELOPMENTAL DISORDER WITH MOVEMENT ABNORMALITIES, ABNORMAL GAIT, AND AUTISTIC FEATURES"
ENSG00000175899.14,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENSG00000166535.19,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENSG00000128274.15,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENSG00000094914.12,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENSG00000103591.12,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
assert clinvar_genes_associations.index.is_unique
assert clinvar_genes_associations.columns.is_unique

# ClinVar genes per trait

In [36]:
clinvar_genes = clinvar_genes_associations.unstack().to_frame('associated')

In [37]:
clinvar_genes.shape

(19346634, 1)

In [38]:
assert clinvar_genes.shape[0] == clinvar_genes_associations.size

In [39]:
clinvar_genes.head()

Unnamed: 0,Unnamed: 1,associated
Alzheimer's disease,ENSG00000175899.14,1
Alzheimer's disease,ENSG00000166535.19,0
Alzheimer's disease,ENSG00000128274.15,0
Alzheimer's disease,ENSG00000094914.12,0
Alzheimer's disease,ENSG00000103591.12,0


In [40]:
clinvar_genes = clinvar_genes[clinvar_genes['associated'] == 1].reset_index().rename(columns={'level_0': 'clinvar_trait', 'level_1': 'gene'}).drop(columns=['associated'])

In [41]:
clinvar_genes.shape

(5921, 2)

In [42]:
clinvar_genes.head()

Unnamed: 0,clinvar_trait,gene
0,Alzheimer's disease,ENSG00000175899.14
1,Alzheimer's disease,ENSG00000142192.20
2,Alzheimer's disease,ENSG00000010704.18
3,Alzheimer's disease,ENSG00000005381.7
4,Alzheimer's disease,ENSG00000164867.10


In [43]:
clinvar_genes = clinvar_genes.assign(gene_name=clinvar_genes['gene'].apply(lambda x: gene_id_long_to_name[x]))

In [44]:
clinvar_genes = clinvar_genes.assign(gene_band=clinvar_genes['gene'].apply(lambda x: gene_id_long_to_band[x]))

In [45]:
clinvar_genes.head()

Unnamed: 0,clinvar_trait,gene,gene_name,gene_band
0,Alzheimer's disease,ENSG00000175899.14,A2M,12p13.31
1,Alzheimer's disease,ENSG00000142192.20,APP,21q21.3
2,Alzheimer's disease,ENSG00000010704.18,HFE,6p22.2
3,Alzheimer's disease,ENSG00000005381.7,MPO,17q22
4,Alzheimer's disease,ENSG00000164867.10,NOS3,7q36.1


In [46]:
assert clinvar_genes.drop_duplicates().shape == clinvar_genes.shape

In [47]:
def _format_gene(x):
    return pd.Series([
        ', '.join(x.gene.values),
        ', '.join([f'{g} ({b})' for g, b in zip(x.gene_name.values, x.gene_band.values)]),
    ], index=['gene_ids', 'gene_names'])

In [48]:
clinvar_genes_grp = clinvar_genes.groupby('clinvar_trait').apply(_format_gene)

In [49]:
clinvar_genes_grp.shape

(5106, 2)

In [50]:
clinvar_genes_grp.head()

Unnamed: 0_level_0,gene_ids,gene_names
clinvar_trait,Unnamed: 1_level_1,Unnamed: 2_level_1
15q13.3 microdeletion syndrome,"ENSG00000175344.17, ENSG00000169926.10","CHRNA7 (15q13.3), KLF13 (15q13.3)"
1q21.1 recurrent microdeletion,"ENSG00000265107.2, ENSG00000121634.5","GJA5 (1q21.2), GJA8 (1q21.2)"
"2,4-Dienoyl-CoA reductase deficiency",ENSG00000152620.12,NADK2 (5p13.2)
2-aminoadipic 2-oxoadipic aciduria,ENSG00000181192.11,DHTKD1 (10p14)
22q13.3 deletion syndrome,ENSG00000251322.7,SHANK3 (22q13.33)


In [51]:
assert clinvar_genes_grp.index.is_unique

### Save

In [52]:
clinvar_traits = clinvar_genes_grp[['gene_names']].reset_index()
clinvar_traits.index.rename('id', inplace=True)

In [53]:
clinvar_traits = clinvar_traits.rename(columns={
    'clinvar_traits': 'description',
    'gene_names': 'genes_info',
})

In [54]:
display(clinvar_traits.shape)
display(clinvar_traits.head())

(5106, 2)

Unnamed: 0_level_0,clinvar_trait,genes_info
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,15q13.3 microdeletion syndrome,"CHRNA7 (15q13.3), KLF13 (15q13.3)"
1,1q21.1 recurrent microdeletion,"GJA5 (1q21.2), GJA8 (1q21.2)"
2,"2,4-Dienoyl-CoA reductase deficiency",NADK2 (5p13.2)
3,2-aminoadipic 2-oxoadipic aciduria,DHTKD1 (10p14)
4,22q13.3 deletion syndrome,SHANK3 (22q13.33)


In [55]:
output_file = os.path.join(conf.WEBAPP_DIR, 'ukb_clinvar-clinvar_traits.tsv')
display(output_file)

'/mnt/phenomexcan_base/webapp/ukb_clinvar-clinvar_traits.tsv'

In [56]:
clinvar_traits.to_csv(output_file, sep='\t')

In [57]:
clinvar_traits_to_id = clinvar_traits.reset_index()[['id', 'clinvar_trait']].set_index('clinvar_trait').to_dict()['id']

In [58]:
clinvar_traits_to_id['2-aminoadipic 2-oxoadipic aciduria']

3

# Load phenotype information

In [59]:
os.path.join(conf.DELIVERABLES_DIR, 'phenotypes_info.tsv.gz')

'/mnt/phenomexcan_base/deliverables/phenotypes_info.tsv.gz'

In [60]:
pheno_info = pd.read_csv(os.path.join(conf.DELIVERABLES_DIR, 'phenotypes_info.tsv.gz'), sep='\t')

In [61]:
pheno_info.shape

(4091, 10)

In [62]:
pheno_info.head()

Unnamed: 0,pheno_id,full_code,short_code,description,unique_description,type,n,n_cases,n_controls,source
0,0,100001_raw-Food_weight,100001_raw,Food weight,Food weight,continuous_raw,51453,,,UK Biobank
1,1,100002_raw-Energy,100002_raw,Energy,Energy,continuous_raw,51453,,,UK Biobank
2,2,100003_raw-Protein,100003_raw,Protein,Protein,continuous_raw,51453,,,UK Biobank
3,3,100004_raw-Fat,100004_raw,Fat,Fat,continuous_raw,51453,,,UK Biobank
4,4,100005_raw-Carbohydrate,100005_raw,Carbohydrate,Carbohydrate,continuous_raw,51453,,,UK Biobank


In [63]:
pheno_desc_to_id = pheno_info[['unique_description', 'pheno_id']].set_index('unique_description').to_dict()['pheno_id']

In [64]:
pheno_desc_to_id['Fat']

3

In [65]:
pheno_full_code_to_id = pheno_info[['full_code', 'pheno_id']].set_index('full_code').to_dict()['pheno_id']

In [66]:
pheno_full_code_to_id['100001_raw-Food_weight']

0

### Obtain z2

In [67]:
clinvar_unstacked = ukb_clinvar_z2.rename(
    index=pheno_full_code_to_id,
    columns=clinvar_traits_to_id,
).T.unstack().to_frame('z2_avg')

In [68]:
assert clinvar_unstacked.index.is_unique

In [69]:
assert ukb_clinvar_z2.size == clinvar_unstacked.shape[0]

In [70]:
clinvar_unstacked.index.rename(['ukb_pheno_id', 'clinvar_pheno_id'], inplace=True)

In [71]:
clinvar_unstacked = clinvar_unstacked.sort_index()

In [72]:
clinvar_unstacked = clinvar_unstacked.assign(sqrt_z2_avg=np.sqrt(clinvar_unstacked['z2_avg']))

In [73]:
assert clinvar_unstacked.index.is_unique

In [74]:
clinvar_unstacked.shape

(20888646, 2)

In [75]:
clinvar_unstacked.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,z2_avg,sqrt_z2_avg
ukb_pheno_id,clinvar_pheno_id,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,1.208743,1.099429
0,1,2.881383,1.697464
0,2,0.106421,0.326222
0,3,0.010371,0.101839
0,4,2.171773,1.473694


# Some testing

In [76]:
pheno_desc_to_id['Depression possibly related to childbirth']

1341

In [77]:
clinvar_traits_to_id['Keratosis palmoplantaris papulosa']

2707

In [78]:
clinvar_unstacked.loc[1341, 2707]

z2_avg         0.080904
sqrt_z2_avg    0.284436
Name: (1341, 2707), dtype: float64

In [79]:
assert clinvar_unstacked.loc[pheno_desc_to_id['Other epidermal thickening'], clinvar_traits_to_id["Alzheimer's disease"]]['sqrt_z2_avg'] == np.sqrt(1.3472206237157518)

In [80]:
assert clinvar_unstacked.loc[pheno_desc_to_id['Depression possibly related to childbirth'], clinvar_traits_to_id["Keratosis palmoplantaris papulosa"]]['sqrt_z2_avg'] == np.sqrt(0.08090372843310314)

In [81]:
clinvar_unstacked[clinvar_unstacked['sqrt_z2_avg'] > 6].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,z2_avg,sqrt_z2_avg
ukb_pheno_id,clinvar_pheno_id,Unnamed: 2_level_1,Unnamed: 3_level_1
80,143,41.145911,6.414508
80,150,46.599677,6.826396
80,154,46.352945,6.8083
80,161,304.541549,17.451119
80,181,51.136357,7.150969


#### Save all results

In [82]:
clinvar_unstacked = clinvar_unstacked[['sqrt_z2_avg']]

In [83]:
display(clinvar_unstacked.shape)
display(clinvar_unstacked.head())

(20888646, 1)

Unnamed: 0_level_0,Unnamed: 1_level_0,sqrt_z2_avg
ukb_pheno_id,clinvar_pheno_id,Unnamed: 2_level_1
0,0,1.099429
0,1,1.697464
0,2,0.326222
0,3,0.101839
0,4,1.473694


In [84]:
clinvar_unstacked_filename = os.path.join(conf.GENE_ASSOC_DIR, 'ukb_clinvar.tsv')
display(clinvar_unstacked_filename)

'/mnt/phenomexcan_base/gene_assoc/ukb_clinvar.tsv'

In [85]:
clinvar_unstacked.to_csv(clinvar_unstacked_filename, sep='\t')