In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pickle

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import settings as conf
import metadata

# Load S-MultiXcan/fastENLOC combined results

In [3]:
all_results_filename = os.path.join(conf.GENE_ASSOC_DIR, 'all_smultixcan_and_fastenloc_results.pkl.xz')
display(all_results_filename)

all_results = pd.read_pickle(all_results_filename)

'/mnt/phenomexcan_base/gene_assoc/all_smultixcan_and_fastenloc_results.pkl.xz'

# All traits

In [4]:
all_traits = pd.Series(all_results['trait'].unique())

In [5]:
all_traits.head()

0     20096_1-Size_of_red_wine_glass_drunk_small_125ml
1                 2345-Ever_had_bowel_cancer_screening
2    N49-Diagnoses_main_ICD10_N49_Inflammatory_diso...
3                                      100011_raw-Iron
4        5221-Index_of_best_refractometry_result_right
dtype: category
Categories (4091, object): [20096_1-Size_of_red_wine_glass_drunk_small_125ml, 2345-Ever_had_bowel_cancer_screening, N49-Diagnoses_main_ICD10_N49_Inflammatory_diso..., 100011_raw-Iron, ..., SSGAC_Depressive_Symptoms, BCAC_ER_positive_BreastCancer_EUR, IBD.EUR.Inflammatory_Bowel_Disease, Astle_et_al_2016_High_light_scatter_reticulocy...]

# Morning/evening person (chronotype)

Morning/evening person (chronotype) -> top 15 genes + CLOCK, BMAL1/ARNTL, (PER1, PER2), CRY1 and CRY2. PER3, NPAS2

In [6]:
PVALUE_THRESHOLD = 5.49e-10

In [7]:
PVALUE_THRESHOLD

5.49e-10

In [8]:
all_traits[all_traits.str.lower().str.contains('chronotype')].tolist()

['1180-Morningevening_person_chronotype', 'Jones_et_al_2016_Chronotype']

## Top 15 genes

In [9]:
_pheno = '1180-Morningevening_person_chronotype'

In [10]:
_res = all_results[
    (
        (all_results['trait'] == _pheno) &
        (all_results['fastenloc_rcp'] >= 0.10) &
        (all_results['smultixcan_pvalue'] < PVALUE_THRESHOLD)
    )
]

In [11]:
_res.shape

(20, 5)

In [12]:
_res.sort_values('smultixcan_pvalue').head(15)

Unnamed: 0,trait,gene,gene_name,smultixcan_pvalue,fastenloc_rcp
37138018,1180-Morningevening_person_chronotype,ENSG00000204104,TRAF3IP1,1.7244589999999998e-20,0.404
37134050,1180-Morningevening_person_chronotype,ENSG00000170667,RASA4B,1.6601689999999998e-19,0.6343
37124541,1180-Morningevening_person_chronotype,ENSG00000102805,CLN5,5.248423e-18,0.3366
37122694,1180-Morningevening_person_chronotype,ENSG00000049245,VAMP3,7.317229e-18,0.6317
37130379,1180-Morningevening_person_chronotype,ENSG00000146469,VIP,1.812497e-17,0.25849
37122188,1180-Morningevening_person_chronotype,ENSG00000005812,FBXL3,1.544816e-16,0.348645
37141075,1180-Morningevening_person_chronotype,ENSG00000249484,LINC01470,4.161078e-16,0.104014
37124219,1180-Morningevening_person_chronotype,ENSG00000100354,TNRC6B,8.441499e-14,0.2036
37125358,1180-Morningevening_person_chronotype,ENSG00000108551,RASD1,1.24633e-12,0.217511
37130496,1180-Morningevening_person_chronotype,ENSG00000147905,ZCCHC7,4.282318e-11,0.2469


## Other genes

In [13]:
_pheno = ['1180-Morningevening_person_chronotype', 'Jones_et_al_2016_Chronotype']

In [14]:
_genes = ['CLOCK', 'ARNTL', 'PER1', 'PER2', 'CRY1', 'CRY2', 'PER3', 'NPAS2']
display(len(_genes))

8

In [15]:
_res = all_results[
    (
        (all_results['trait'].isin(_pheno)) &
        (all_results['gene_name'].isin(_genes))
    )
]

In [16]:
assert _res.shape[0] == len(_genes) * len(_pheno), set(_genes).difference(set(_res['gene_name']))

In [17]:
_res.shape

(16, 5)

In [18]:
_res.sort_values(['trait', 'smultixcan_pvalue'])

Unnamed: 0,trait,gene,gene_name,smultixcan_pvalue,fastenloc_rcp
37122695,1180-Morningevening_person_chronotype,ENSG00000049246,PER3,1.6510740000000002e-17,0.08005
37128165,1180-Morningevening_person_chronotype,ENSG00000132326,PER2,1.138651e-09,0.01877
37122291,1180-Morningevening_person_chronotype,ENSG00000008405,CRY1,0.0005632047,0.03028
37134012,1180-Morningevening_person_chronotype,ENSG00000170485,NPAS2,0.001237491,0.1191
37128378,1180-Morningevening_person_chronotype,ENSG00000133794,ARNTL,0.004350487,0.015728
37128547,1180-Morningevening_person_chronotype,ENSG00000134852,CLOCK,0.08421765,0.008884
37126944,1180-Morningevening_person_chronotype,ENSG00000121671,CRY2,0.1055408,0.002016
37135440,1180-Morningevening_person_chronotype,ENSG00000179094,PER1,0.6909949,0.242
91682283,Jones_et_al_2016_Chronotype,ENSG00000049246,PER3,1.070639e-07,0.013
91687753,Jones_et_al_2016_Chronotype,ENSG00000132326,PER2,0.0002175813,0.0


# TPO, top 10 traits

In [19]:
_gene = 'TPO'

In [20]:
_res = all_results[
    (
        (all_results['gene_name'] == _gene)
#       &  (all_results['fastenloc_rcp'] >= 0.10)
#         (all_results['smultixcan_pvalue'] < PVALUE_THRESHOLD)
    )
]

In [21]:
_res.shape

(4091, 5)

In [22]:
_tmp = _res.sort_values('smultixcan_pvalue').head(10)
display(_tmp)

Unnamed: 0,trait,gene,gene_name,smultixcan_pvalue,fastenloc_rcp
11778979,20002_1226-Noncancer_illness_code_selfreported...,ENSG00000115705,TPO,1.406237e-14,0.9986
38901476,20003_1141191044-Treatmentmedication_code_levo...,ENSG00000115705,TPO,1.539319e-10,0.996852
67731769,20003_1141157264-Treatmentmedication_code_salm...,ENSG00000115705,TPO,7.454061e-05,0.0
4251194,400_raw-Time_to_complete_round,ENSG00000115705,TPO,0.0001190759,0.044376
19396648,20126_5-Bipolar_and_major_depression_status_Si...,ENSG00000115705,TPO,0.000248197,0.0
70922651,1349-Processed_meat_intake,ENSG00000115705,TPO,0.000532701,0.039486
17464142,20003_1140879778-Treatmentmedication_code_doxa...,ENSG00000115705,TPO,0.0008807842,0.019986
38968889,2139_raw-Age_first_had_sexual_intercourse,ENSG00000115705,TPO,0.00107821,0.06187
54586234,5085_raw-Spherical_power_left,ENSG00000115705,TPO,0.001268166,0.07147
53957046,S05-Diagnoses_main_ICD10_S05_Injury_of_eye_and...,ENSG00000115705,TPO,0.001326789,0.0


In [23]:
_tmp['trait'].tolist()

['20002_1226-Noncancer_illness_code_selfreported_hypothyroidismmyxoedema',
 '20003_1141191044-Treatmentmedication_code_levothyroxine_sodium',
 '20003_1141157264-Treatmentmedication_code_salmeterol_product',
 '400_raw-Time_to_complete_round',
 '20126_5-Bipolar_and_major_depression_status_Single_Probable_major_depression_episode',
 '1349-Processed_meat_intake',
 '20003_1140879778-Treatmentmedication_code_doxazosin',
 '2139_raw-Age_first_had_sexual_intercourse',
 '5085_raw-Spherical_power_left',
 'S05-Diagnoses_main_ICD10_S05_Injury_of_eye_and_orbit']

# PSMD3 -> Top 10 traits

In [24]:
_gene = 'PSMD3'

In [25]:
metadata.GENE_NAME_TO_ID_MAP[_gene]

'ENSG00000108344'

In [26]:
_res = all_results[
    (
        (all_results['gene_name'] == _gene)
#       &  (all_results['fastenloc_rcp'] >= 0.10)
#         (all_results['smultixcan_pvalue'] < PVALUE_THRESHOLD)
    )
]

In [27]:
_res.shape

(4091, 5)

In [28]:
_tmp = _res.sort_values('smultixcan_pvalue').head(10)
display(_tmp)

Unnamed: 0,trait,gene,gene_name,smultixcan_pvalue,fastenloc_rcp
20474314,30140_raw-Neutrophill_count,ENSG00000108344,PSMD3,1e-320,0.3783
29282946,30200_raw-Neutrophill_percentage,ENSG00000108344,PSMD3,4.405596e-263,0.4322
91707384,Astle_et_al_2016_Sum_basophil_neutrophil_counts,ENSG00000108344,PSMD3,4.669068e-261,0.598
91482674,Astle_et_al_2016_Granulocyte_count,ENSG00000108344,PSMD3,1.297851e-259,0.606
42181300,30000_raw-White_blood_cell_leukocyte_count,ENSG00000108344,PSMD3,3.999405e-258,0.4183
91302906,Astle_et_al_2016_Neutrophil_count,ENSG00000108344,PSMD3,3.946031e-257,0.605
91033254,Astle_et_al_2016_Sum_neutrophil_eosinophil_counts,ENSG00000108344,PSMD3,3.669955e-256,0.593
91460203,Astle_et_al_2016_Myeloid_white_cell_count,ENSG00000108344,PSMD3,3.529369e-244,0.595
90156885,30180_raw-Lymphocyte_percentage,ENSG00000108344,PSMD3,1.018281e-215,0.2339
91347848,Astle_et_al_2016_White_blood_cell_count,ENSG00000108344,PSMD3,7.790088e-203,0.609


# ZFP36L2 -> Top 10 traits

In [29]:
_gene = 'ZFP36L2'

In [30]:
metadata.GENE_NAME_TO_ID_MAP[_gene]

'ENSG00000152518'

In [31]:
_res = all_results[
    (
        (all_results['gene_name'] == _gene)
#       &  (all_results['fastenloc_rcp'] >= 0.10)
#         (all_results['smultixcan_pvalue'] < PVALUE_THRESHOLD)
    )
]

In [32]:
_res.shape

(4091, 5)

In [33]:
_tmp = _res.sort_values('smultixcan_pvalue').head(10)
display(_tmp)

Unnamed: 0,trait,gene,gene_name,smultixcan_pvalue,fastenloc_rcp
90162545,30180_raw-Lymphocyte_percentage,ENSG00000152518,ZFP36L2,3.6171950000000003e-66,0.4014
29288606,30200_raw-Neutrophill_percentage,ENSG00000152518,ZFP36L2,1.165641e-61,0.3872
63601823,30080_raw-Platelet_count,ENSG00000152518,ZFP36L2,6.876903999999999e-51,0.063481
61691788,30090_raw-Platelet_crit,ENSG00000152518,ZFP36L2,2.051007e-43,0.04183
91533276,Astle_et_al_2016_Platelet_count,ENSG00000152518,ZFP36L2,2.2218800000000002e-29,0.891
70500420,30260_raw-Mean_reticulocyte_volume,ENSG00000152518,ZFP36L2,4.644179e-24,0.051511
91151269,Astle_et_al_2016_Lymphocyte_counts,ENSG00000152518,ZFP36L2,1.026523e-22,0.945
26569615,30050_raw-Mean_corpuscular_haemoglobin,ENSG00000152518,ZFP36L2,1.666844e-20,0.3489
90117603,30040_raw-Mean_corpuscular_volume,ENSG00000152518,ZFP36L2,4.525347e-20,0.1767
20479974,30140_raw-Neutrophill_count,ENSG00000152518,ZFP36L2,5.996725e-15,0.398946


# FBXO7 -> Top 10 traits

In [34]:
_gene = 'FBXO7'

In [35]:
metadata.GENE_NAME_TO_ID_MAP[_gene]

'ENSG00000100225'

In [36]:
_res = all_results[
    (
        (all_results['gene_name'] == _gene)
#       &  (all_results['fastenloc_rcp'] >= 0.10)
#         (all_results['smultixcan_pvalue'] < PVALUE_THRESHOLD)
    )
]

In [37]:
_res.shape

(4091, 5)

In [38]:
_tmp = _res.sort_values('smultixcan_pvalue').head(10)
display(_tmp)

Unnamed: 0,trait,gene,gene_name,smultixcan_pvalue,fastenloc_rcp
90110782,30040_raw-Mean_corpuscular_volume,ENSG00000100225,FBXO7,5.907129e-116,1.0812
70493599,30260_raw-Mean_reticulocyte_volume,ENSG00000100225,FBXO7,1.9187259999999999e-91,0.88315
26562794,30050_raw-Mean_corpuscular_haemoglobin,ENSG00000100225,FBXO7,2.971476e-91,0.7975
26068432,30270_raw-Mean_sphered_cell_volume,ENSG00000100225,FBXO7,9.591552e-73,0.8747
65572450,30010_raw-Red_blood_cell_erythrocyte_count,ENSG00000100225,FBXO7,1.758737e-31,0.5081
67774608,30070_raw-Red_blood_cell_erythrocyte_distribut...,ENSG00000100225,FBXO7,4.5332680000000006e-17,0.8025
91818578,Astle_et_al_2016_Red_blood_cell_count,ENSG00000100225,FBXO7,5.952877e-14,0.772
89324297,30280_raw-Immature_reticulocyte_fraction,ENSG00000100225,FBXO7,2.569621e-05,0.985
18900183,2217_raw-Age_started_wearing_glasses_or_contac...,ENSG00000100225,FBXO7,2.945839e-05,0.678506
78358449,30100_raw-Mean_platelet_thrombocyte_volume,ENSG00000100225,FBXO7,6.089061e-05,0.518583


# SYN3 -> Top 10 traits

In [39]:
_gene = 'SYN3'

In [40]:
metadata.GENE_NAME_TO_ID_MAP[_gene]

'ENSG00000185666'

In [41]:
_res = all_results[
    (
        (all_results['gene_name'] == _gene)
#       &  (all_results['fastenloc_rcp'] >= 0.10)
#         (all_results['smultixcan_pvalue'] < PVALUE_THRESHOLD)
    )
]

In [42]:
_res.shape

(4091, 5)

In [43]:
_tmp = _res.sort_values('smultixcan_pvalue').head(10)
display(_tmp)

Unnamed: 0,trait,gene,gene_name,smultixcan_pvalue,fastenloc_rcp
90123090,30040_raw-Mean_corpuscular_volume,ENSG00000185666,SYN3,7.311195e-100,0.5802
26575102,30050_raw-Mean_corpuscular_haemoglobin,ENSG00000185666,SYN3,3.359134e-83,0.6254
70505907,30260_raw-Mean_reticulocyte_volume,ENSG00000185666,SYN3,9.827677e-77,0.7026
26080740,30270_raw-Mean_sphered_cell_volume,ENSG00000185666,SYN3,9.726859e-61,0.2022
65584758,30010_raw-Red_blood_cell_erythrocyte_count,ENSG00000185666,SYN3,1.50294e-26,0.3603
67786916,30070_raw-Red_blood_cell_erythrocyte_distribut...,ENSG00000185666,SYN3,2.7112410000000002e-17,0.7306
91830886,Astle_et_al_2016_Red_blood_cell_count,ENSG00000185666,SYN3,5.51201e-13,0.671
75427056,23120_raw-Arm_fat_mass_right,ENSG00000185666,SYN3,3.898906e-05,0.338057
79988669,23104_raw-Body_mass_index_BMI,ENSG00000185666,SYN3,5.928607e-05,0.07018
77921337,23107_raw-Impedance_of_leg_right,ENSG00000185666,SYN3,6.469147e-05,0.0


# BPIFC -> Top 10 traits

In [51]:
_gene = 'BPIFC'

In [52]:
metadata.GENE_NAME_TO_ID_MAP[_gene]

'ENSG00000184459'

In [53]:
_res = all_results[
    (
        (all_results['gene_name'] == _gene)
#       &  (all_results['fastenloc_rcp'] >= 0.10)
#         (all_results['smultixcan_pvalue'] < PVALUE_THRESHOLD)
    )
]

In [54]:
_res.shape

(4091, 5)

In [55]:
_tmp = _res.sort_values('smultixcan_pvalue').head(10)
display(_tmp)

Unnamed: 0,trait,gene,gene_name,smultixcan_pvalue,fastenloc_rcp
90122860,30040_raw-Mean_corpuscular_volume,ENSG00000184459,BPIFC,1.734341e-32,0.003652
26574872,30050_raw-Mean_corpuscular_haemoglobin,ENSG00000184459,BPIFC,3.754533e-26,0.003926
70505677,30260_raw-Mean_reticulocyte_volume,ENSG00000184459,BPIFC,3.407269e-15,0.004066
26080510,30270_raw-Mean_sphered_cell_volume,ENSG00000184459,BPIFC,4.976305e-15,0.000965
65584528,30010_raw-Red_blood_cell_erythrocyte_count,ENSG00000184459,BPIFC,2.962014e-10,0.008548
67786686,30070_raw-Red_blood_cell_erythrocyte_distribut...,ENSG00000184459,BPIFC,5.981571e-09,0.004214
91650888,GIANT_HEIGHT,ENSG00000184459,BPIFC,5.994401e-05,0.0
11991193,E4_THYTOXGOITDIF-Thyrotoxicosis_with_diffuse_g...,ENSG00000184459,BPIFC,0.0005383346,0.02803
34844200,4100_raw-Ankle_spacing_width_left,ENSG00000184459,BPIFC,0.0005775895,0.127511
68820352,C_DIFFICILE_ENTEROCOLITIS-Enterocolitis_due_to...,ENSG00000184459,BPIFC,0.001021212,0.002691


# PCSK9 + Type 1 diabetes without complications

In [44]:
all_traits[all_traits.str.lower().str.contains('diabetes')].tolist()

['E4_DM1OPTH-Type_1_diabetes_with_ophthalmic_complications',
 '6177_2-Medication_for_cholesterol_blood_pressure_or_diabetes_Blood_pressure_medication',
 '6153_5-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Oral_contraceptive_pill_or_minipill',
 '2443-Diabetes_diagnosed_by_doctor',
 '6177_100-Medication_for_cholesterol_blood_pressure_or_diabetes_None_of_the_above',
 '20002_1223-Noncancer_illness_code_selfreported_type_2_diabetes',
 'E4_DM1NOCOMP-Type_1_diabetes_without_complications',
 'E10-Diagnoses_main_ICD10_E10_Insulindependent_diabetes_mellitus',
 '6153_100-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_None_of_the_above',
 '6153_1-Medication_for_cholesterol_blood_pressure_diabetes_or_take_exogenous_hormones_Cholesterol_lowering_medication',
 '2986-Started_insulin_within_one_year_diagnosis_of_diabetes',
 'E11-Diagnoses_main_ICD10_E11_Noninsulindependent_diabetes_mellitus',
 'E4_DM1KETO-Type_1_diabetes_with_ketoacidosis

## Top 15 genes

In [45]:
_pheno = 'E4_DM1NOCOMP-Type_1_diabetes_without_complications'

In [46]:
_gene = 'PCSK9'

In [47]:
metadata.GENE_NAME_TO_ID_MAP[_gene]

'ENSG00000169174'

In [48]:
_res = all_results[
    (
        (all_results['trait'] == _pheno)
        & (all_results['gene_name'] == _gene)
    )
]

In [49]:
_res.shape

(1, 5)

In [50]:
_res.sort_values('smultixcan_pvalue').head(15)

Unnamed: 0,trait,gene,gene_name,smultixcan_pvalue,fastenloc_rcp
19853542,E4_DM1NOCOMP-Type_1_diabetes_without_complicat...,ENSG00000169174,PCSK9,0.000822,0.0
