In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pickle

import numpy as np
import pandas as pd

import settings as conf

In [3]:
output_dir = os.path.join(conf.DELIVERABLES_DIR, 'roc_validation', 'classifier_tables', 't2d')
os.makedirs(output_dir, exist_ok=True)
display(output_dir)

'/mnt/phenomexcan_base/deliverables/roc_validation/classifier_tables/t2d'

# Define diabetes traits

In [4]:
diabetes_traits = [
    'E11-Diagnoses_main_ICD10_E11_Noninsulindependent_diabetes_mellitus',
    '20002_1223-Noncancer_illness_code_selfreported_type_2_diabetes',
    'E14-Diagnoses_main_ICD10_E14_Unspecified_diabetes_mellitus',
    'E4_DM2NOCOMP-Type_2_diabetes_without_complications',
    'E4_DM2OPTH-Type_2_diabetes_with_ophthalmic_complications',
    'E4_DM2-Type_2_diabetes',
    'E4_DM2PERIPH-Type_2_diabetes_with_peripheral_circulatory_complications',
]

In [5]:
all_t2d_traits = pd.Series(diabetes_traits)

In [6]:
all_t2d_traits.shape

(7,)

In [7]:
all_t2d_traits

0    E11-Diagnoses_main_ICD10_E11_Noninsulindepende...
1    20002_1223-Noncancer_illness_code_selfreported...
2    E14-Diagnoses_main_ICD10_E14_Unspecified_diabe...
3    E4_DM2NOCOMP-Type_2_diabetes_without_complicat...
4    E4_DM2OPTH-Type_2_diabetes_with_ophthalmic_com...
5                               E4_DM2-Type_2_diabetes
6    E4_DM2PERIPH-Type_2_diabetes_with_peripheral_c...
dtype: object

In [8]:
all_t2d_traits.tolist()

['E11-Diagnoses_main_ICD10_E11_Noninsulindependent_diabetes_mellitus',
 '20002_1223-Noncancer_illness_code_selfreported_type_2_diabetes',
 'E14-Diagnoses_main_ICD10_E14_Unspecified_diabetes_mellitus',
 'E4_DM2NOCOMP-Type_2_diabetes_without_complications',
 'E4_DM2OPTH-Type_2_diabetes_with_ophthalmic_complications',
 'E4_DM2-Type_2_diabetes',
 'E4_DM2PERIPH-Type_2_diabetes_with_peripheral_circulatory_complications']

# Load pheno info

In [13]:
import metadata

In [14]:
metadata.RAPID_GWAS_PHENO_INFO.head()

Unnamed: 0_level_0,description,variable_type,source,n_non_missing,n_missing,n_controls,n_cases,PHESANT_transformation,notes
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
G54,Diagnoses - main ICD10: G54 Nerve root and ple...,categorical,icd10,361194,0,361051.0,143.0,,
C21,Diagnoses - main ICD10: C21 Malignant neoplasm...,categorical,icd10,361194,0,361055.0,139.0,,
F45,Diagnoses - main ICD10: F45 Somatoform disorders,categorical,icd10,361194,0,360953.0,241.0,,
I35,Diagnoses - main ICD10: I35 Nonrheumatic aorti...,categorical,icd10,361194,0,360293.0,901.0,,
D38,Diagnoses - main ICD10: D38 Neoplasm of uncert...,categorical,icd10,361194,0,361087.0,107.0,,


In [15]:
df = metadata.RAPID_GWAS_PHENO_INFO

In [17]:
t2d_trait_codes = [t.split('-')[0] for t in all_t2d_traits.tolist()]

In [18]:
assert len(t2d_trait_codes) == all_t2d_traits.shape[0]

In [19]:
t2d_trait_codes

['E11',
 '20002_1223',
 'E14',
 'E4_DM2NOCOMP',
 'E4_DM2OPTH',
 'E4_DM2',
 'E4_DM2PERIPH']

In [24]:
df.loc[t2d_trait_codes]

Unnamed: 0_level_0,description,variable_type,source,n_non_missing,n_missing,n_controls,n_cases,PHESANT_transformation,notes
phenotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
E11,Diagnoses - main ICD10: E11 Non-insulin-depend...,categorical,icd10,361194,0,360489.0,705.0,,
20002_1223,"Non-cancer illness code, self-reported: type 2...",binary,phesant,361141,53,358849.0,2292.0,20002_0 || CAT-MUL-BINARY-VAR 1223 || Indicato...,Code for non-cancer illness. If the participan...
E14,Diagnoses - main ICD10: E14 Unspecified diabet...,categorical,icd10,361194,0,360970.0,224.0,,
E4_DM2NOCOMP,Type 2 diabetes without complications,categorical,finngen,361194,0,360740.0,454.0,,
E4_DM2OPTH,Type 2 diabetes with ophthalmic complications,categorical,finngen,361194,0,360958.0,236.0,,
E4_DM2,Type 2 diabetes,categorical,finngen,361194,0,360306.0,888.0,,
E4_DM2PERIPH,Type 2 diabetes with peripheral circulatory co...,categorical,finngen,361194,0,361067.0,127.0,,


## Save

### Internal

In [9]:
t2d_traits_filename = os.path.join(output_dir, 't2d_traits.pkl.xz')
display(t2d_traits_filename)

'/mnt/phenomexcan_base/deliverables/roc_validation/classifier_tables/t2d/t2d_traits.pkl.xz'

In [10]:
all_t2d_traits.to_pickle(t2d_traits_filename)

### Publishable

In [11]:
t2d_traits_filename = os.path.join(output_dir, 't2d_traits.tsv.gz')
display(t2d_traits_filename)

'/mnt/phenomexcan_base/deliverables/roc_validation/classifier_tables/t2d/t2d_traits.tsv.gz'

In [12]:
all_t2d_traits.to_csv(t2d_traits_filename, sep='\t', index=False, header=False)