In [7]:
import pandas as pd
import scipy.stats
from collections import Counter
from statsmodels.stats.multitest import fdrcorrection

GEST = 'out_data_fg/cor_full_GEST_DIABETES.txt'
HYPT = 'out_data_fg/cor_full_I9_HYPTENSPREG.txt'
VOMT = 'out_data_fg/cor_full_O15_EXCESS_VOMIT_PREG.txt'
HPRT = 'out_data_fg/cor_full_O15_GESTAT_HYPERT.txt'
FINN_UK = 'data/finn_uk_codes.csv'
PRIOR_LIST = ['2']

BONFERRONI = True
FDR = False

In [8]:
fu = pd.read_csv(FINN_UK)
fu = fu.loc[:,['0', 'finn_pheno']].drop_duplicates().rename(columns={'finn_pheno':'full_name'})
fu = pd.concat([fu, pd.DataFrame({'0':['GEST_DIABETES', 'O15_PRE_OR_ECLAMPSIA'],'full_name':['Gestational diabetes (for exclusion)','Pre-eclampsia or eclampsia']})])
fu.index = fu['0']
fu

Unnamed: 0_level_0,0,full_name
0,Unnamed: 1_level_1,Unnamed: 2_level_1
E4_THYROID,E4_THYROID,Disorders of the thyroid gland
C3_THYROID_GLAND,C3_THYROID_GLAND,Malignant neoplasm of thyroid gland
C3_THYROID_GLAND_EXALLC,C3_THYROID_GLAND_EXALLC,Malignant neoplasm of thyroid gland (all cance...
E4_THYTOXNOD,E4_THYTOXNOD,Thyrotoxicosis with toxic single thyroid nodule
I9_RHEUVALV,I9_RHEUVALV,Rheumatic valve diseases
...,...,...
M13_SLE,M13_SLE,Systemic lupus erythematosus
SLE_NOS,SLE_NOS,"Systemic lupus erythematosus, unspecified"
DRUGADVERS_SYSTEMIC_LUPUS_ERYTHEMAT,DRUGADVERS_SYSTEMIC_LUPUS_ERYTHEMAT,Drug-induced systemic lupus erythematosus
GEST_DIABETES,GEST_DIABETES,Gestational diabetes (for exclusion)


In [9]:
ph_map = {'I9_HYPTENSPREG':'HP', 'GEST_DIABETES':'GD', 'O15_GESTAT_HYPERT':'GH', 'O15_EXCESS_VOMIT_PREG': 'EV'}

In [10]:
def return_df(FILE):
    data = ''
    with open(FILE, 'r') as in_f:
        data = in_f.read().replace('\n--', '').split('\n')
    keys = list(map(lambda x: x.split('/')[0].replace('f_', ''), data[::2]))
    values = map(lambda x: x.split(' '), data[1::2])
    traits = map(lambda x: x.split('/')[-1].replace('.cors', ''), data[::2])
    data_dict = {k: v for k, v in zip(keys, values)}
    for k, v in zip(keys, traits):
        try:
            data_dict[k].append(v)
        except KeyError:
            print(k)
    return pd.DataFrame(data_dict, index=['Cor_All', 'cor', 'cor_std', 'trait']).T


def count_sign(data):
    data['grep_stat'] = data.cor.astype(float)/data.cor_std.astype(float)
    data['pval'] = scipy.stats.norm.sf(abs(data['grep_stat']))*2
    data['pval_fdr'] = fdrcorrection(data.pval)[1]
    data['p2'] = data.index
    PVAL_CUTOFF = 0.05/data.shape[0]
    data['significant_bf'] = ''
    data['significant_fdr'] = ''
    if BONFERRONI:
        data.loc[data.pval<PVAL_CUTOFF, ['significant_bf']] = '*'
    if FDR:
        data.loc[data.pval_fdr<0.05, ['significant_fdr']] = '+'
    data['significant'] = data.significant_bf + data.significant_fdr
    return data

In [11]:
# g, h = list(gest_data.index), list(hypt_data.index)
# len(g), len(h), len(set(g)), len(set(h)), len(set(g) & set(h))

In [12]:
gest_data  = return_df(GEST)
hypt_data = return_df(HYPT)
vomt_data = return_df(VOMT)
hprt_data = return_df(HPRT)
gest_data  = count_sign(gest_data)
hypt_data = count_sign(hypt_data)
vomt_data = count_sign(vomt_data)
hprt_data = count_sign(hprt_data)
display(gest_data)
display(hypt_data)
display(vomt_data)
display(hprt_data)







Unnamed: 0,Cor_All,cor,cor_std,trait,grep_stat,pval,pval_fdr,p2,significant_bf,significant_fdr,significant
AB1_BACTINF_NOS,Cor_All,0.538912,0.201296,GEST_DIABETES,2.677212,0.007424,0.066275,AB1_BACTINF_NOS,,,
AB1_BACT_INTEST_OTH,Cor_All,0.496793,1.225783,GEST_DIABETES,0.405286,0.685267,0.911610,AB1_BACT_INTEST_OTH,,,
AB1_CANDIDIASIS,Cor_All,0.714293,0.934676,GEST_DIABETES,0.764215,0.444739,0.789857,AB1_CANDIDIASIS,,,
AB1_GASTROENTERITIS_NOS,Cor_All,0.472679,0.424947,GEST_DIABETES,1.112325,0.265999,0.646211,AB1_GASTROENTERITIS_NOS,,,
AB1_INFECTIONS,Cor_All,0.522366,0.238934,GEST_DIABETES,2.186236,0.028798,0.180778,AB1_INFECTIONS,,,
...,...,...,...,...,...,...,...,...,...,...,...
Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,Cor_All,0.052886,0.184335,GEST_DIABETES,0.286902,0.774188,0.934433,Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,,,
Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,Cor_All,0.028002,0.202449,GEST_DIABETES,0.138316,0.889990,0.962535,Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,,,
Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_DISEA,Cor_All,-0.295997,0.257120,GEST_DIABETES,-1.151202,0.249649,0.623738,Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_D...,,,
Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,Cor_All,1.004349,0.869686,GEST_DIABETES,1.154841,0.248156,0.622198,Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,,,


Unnamed: 0,Cor_All,cor,cor_std,trait,grep_stat,pval,pval_fdr,p2,significant_bf,significant_fdr,significant
AB1_BACTINF_NOS,Cor_All,0.421722,0.176204,I9_HYPTENSPREG,2.393374,0.016694,0.157770,AB1_BACTINF_NOS,,,
AB1_BACT_INTEST_OTH,Cor_All,0.117067,0.283317,I9_HYPTENSPREG,0.413201,0.679459,0.930469,AB1_BACT_INTEST_OTH,,,
AB1_CANDIDIASIS,Cor_All,-0.552782,0.766716,I9_HYPTENSPREG,-0.720974,0.470926,0.843550,AB1_CANDIDIASIS,,,
AB1_GASTROENTERITIS_NOS,Cor_All,0.308142,0.146515,I9_HYPTENSPREG,2.103143,0.035453,0.245886,AB1_GASTROENTERITIS_NOS,,,
AB1_INFECTIONS,Cor_All,0.215657,0.099965,I9_HYPTENSPREG,2.157325,0.030980,0.227719,AB1_INFECTIONS,,,
...,...,...,...,...,...,...,...,...,...,...,...
Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,Cor_All,0.450626,0.157268,I9_HYPTENSPREG,2.865338,0.004166,0.056864,Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,,,
Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,Cor_All,0.341246,0.227455,I9_HYPTENSPREG,1.500279,0.133542,0.524412,Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,,,
Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_DISEA,Cor_All,-0.572918,0.262027,I9_HYPTENSPREG,-2.186485,0.028780,0.215226,Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_D...,,,
Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,Cor_All,0.481440,0.770888,I9_HYPTENSPREG,0.624527,0.532282,0.875576,Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,,,


Unnamed: 0,Cor_All,cor,cor_std,trait,grep_stat,pval,pval_fdr,p2,significant_bf,significant_fdr,significant
AB1_BACTINF_NOS,Cor_All,0.591617,0.496331,O15_EXCESS_VOMIT_PREG,1.191981,0.233269,0.929492,AB1_BACTINF_NOS,,,
AB1_BACT_INTEST_OTH,Cor_All,0.674803,1.633649,O15_EXCESS_VOMIT_PREG,0.413065,0.679559,0.950482,AB1_BACT_INTEST_OTH,,,
AB1_CANDIDIASIS,Cor_All,-0.304160,1.205404,O15_EXCESS_VOMIT_PREG,-0.252330,0.800786,0.952547,AB1_CANDIDIASIS,,,
AB1_GASTROENTERITIS_NOS,Cor_All,1.496180,0.981609,O15_EXCESS_VOMIT_PREG,1.524212,0.127456,0.929492,AB1_GASTROENTERITIS_NOS,,,
AB1_INFECTIONS,Cor_All,1.355828,0.745581,O15_EXCESS_VOMIT_PREG,1.818485,0.068990,0.929492,AB1_INFECTIONS,,,
...,...,...,...,...,...,...,...,...,...,...,...
Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,Cor_All,0.634431,0.473453,O15_EXCESS_VOMIT_PREG,1.340008,0.180243,0.929492,Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,,,
Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,Cor_All,0.119258,0.466003,O15_EXCESS_VOMIT_PREG,0.255917,0.798015,0.952547,Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,,,
Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_DISEA,Cor_All,1.502852,0.930034,O15_EXCESS_VOMIT_PREG,1.615911,0.106114,0.929492,Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_D...,,,
Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,Cor_All,0.572200,0.518828,O15_EXCESS_VOMIT_PREG,1.102870,0.270083,0.929492,Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,,,


Unnamed: 0,Cor_All,cor,cor_std,trait,grep_stat,pval,pval_fdr,p2,significant_bf,significant_fdr,significant
AB1_BACTINF_NOS,Cor_All,0.326080,0.192789,O15_GESTAT_HYPERT,1.691383,0.090764,0.560897,AB1_BACTINF_NOS,,,
AB1_BACT_INTEST_OTH,Cor_All,0.088091,0.305635,O15_GESTAT_HYPERT,0.288223,0.773176,0.937238,AB1_BACT_INTEST_OTH,,,
AB1_CANDIDIASIS,Cor_All,-0.546461,0.776607,O15_GESTAT_HYPERT,-0.703652,0.481650,0.892705,AB1_CANDIDIASIS,,,
AB1_GASTROENTERITIS_NOS,Cor_All,0.230596,0.140439,O15_GESTAT_HYPERT,1.641966,0.100597,0.575938,AB1_GASTROENTERITIS_NOS,,,
AB1_INFECTIONS,Cor_All,0.176180,0.110236,O15_GESTAT_HYPERT,1.598207,0.109997,0.594951,AB1_INFECTIONS,,,
...,...,...,...,...,...,...,...,...,...,...,...
Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,Cor_All,0.373574,0.155309,O15_GESTAT_HYPERT,2.405360,0.016157,0.217104,Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,,,
Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,Cor_All,0.210194,0.230249,O15_GESTAT_HYPERT,0.912899,0.361296,0.861247,Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,,,
Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_DISEA,Cor_All,-0.357972,0.211924,O15_GESTAT_HYPERT,-1.689153,0.091190,0.560897,Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_D...,,,
Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,Cor_All,0.325529,0.541916,O15_GESTAT_HYPERT,0.600700,0.548040,0.893979,Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,,,


In [13]:
concatted_data = pd.concat([gest_data, hypt_data, vomt_data, hprt_data])
concatted_data

Unnamed: 0,Cor_All,cor,cor_std,trait,grep_stat,pval,pval_fdr,p2,significant_bf,significant_fdr,significant
AB1_BACTINF_NOS,Cor_All,0.538912,0.201296,GEST_DIABETES,2.677212,0.007424,0.066275,AB1_BACTINF_NOS,,,
AB1_BACT_INTEST_OTH,Cor_All,0.496793,1.225783,GEST_DIABETES,0.405286,0.685267,0.911610,AB1_BACT_INTEST_OTH,,,
AB1_CANDIDIASIS,Cor_All,0.714293,0.934676,GEST_DIABETES,0.764215,0.444739,0.789857,AB1_CANDIDIASIS,,,
AB1_GASTROENTERITIS_NOS,Cor_All,0.472679,0.424947,GEST_DIABETES,1.112325,0.265999,0.646211,AB1_GASTROENTERITIS_NOS,,,
AB1_INFECTIONS,Cor_All,0.522366,0.238934,GEST_DIABETES,2.186236,0.028798,0.180778,AB1_INFECTIONS,,,
...,...,...,...,...,...,...,...,...,...,...,...
Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,Cor_All,0.373574,0.155309,O15_GESTAT_HYPERT,2.405360,0.016157,0.217104,Z21_PRESENCE_CARDIAC_VASCULAR_IMPLANTNTS_GRAFTS,,,
Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,Cor_All,0.210194,0.230249,O15_GESTAT_HYPERT,0.912899,0.361296,0.861247,Z21_PROCED_PURPO_OTH_REMED_HEALTH_STATE,,,
Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_DISEA,Cor_All,-0.357972,0.211924,O15_GESTAT_HYPERT,-1.689153,0.091190,0.560897,Z21_SPECIAL_SCREEN_EXAM_INFECTIOUS_PARASITIC_D...,,,
Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,Cor_All,0.325529,0.541916,O15_GESTAT_HYPERT,0.600700,0.548040,0.893979,Z21_SPECIAL_SCREEN_EXAM_OTH_DISEA_DISORD,,,


In [14]:
filtered_concatted_data = concatted_data[concatted_data.significant!='']
cd_final = concatted_data[concatted_data.p2.isin(set(filtered_concatted_data.index))]
cd_final

Unnamed: 0,Cor_All,cor,cor_std,trait,grep_stat,pval,pval_fdr,p2,significant_bf,significant_fdr,significant
E4_DIABETES,Cor_All,1.107069,0.193941,GEST_DIABETES,5.708277,1.141254e-08,1.962957e-06,E4_DIABETES,*,,*
E4_OBESITY,Cor_All,0.680994,0.187512,GEST_DIABETES,3.631736,2.815215e-04,7.336621e-03,E4_OBESITY,,,
FG_DOAAC,Cor_All,0.446735,0.126326,GEST_DIABETES,3.536366,4.056719e-04,8.721946e-03,FG_DOAAC,,,
FG_OTHHEART,Cor_All,0.313117,0.139646,GEST_DIABETES,2.242220,2.494718e-02,1.650352e-01,FG_OTHHEART,,,
G6_NERPLEX,Cor_All,0.845052,0.152787,GEST_DIABETES,5.530916,3.185636e-08,4.523593e-06,G6_NERPLEX,*,,*
...,...,...,...,...,...,...,...,...,...,...,...
O15_PREG_MATERN_CARE,Cor_All,-0.097986,0.272672,O15_GESTAT_HYPERT,-0.359355,7.193297e-01,9.313002e-01,O15_PREG_MATERN_CARE,,,
O15_PREG_OTHER_MAT_DISORD,Cor_All,0.174033,0.119531,O15_GESTAT_HYPERT,1.455965,1.454022e-01,6.555842e-01,O15_PREG_OTHER_MAT_DISORD,,,
O15_PRE_OR_ECLAMPSIA,Cor_All,0.987726,0.132211,O15_GESTAT_HYPERT,7.470831,7.969012e-14,7.614834e-12,O15_PRE_OR_ECLAMPSIA,*,,*
R18_DIZZI_GIDDI,Cor_All,0.687925,0.166724,O15_GESTAT_HYPERT,4.126131,3.689178e-05,1.982933e-03,R18_DIZZI_GIDDI,*,,*


In [15]:
final = pd.merge(cd_final, fu,  how="left", left_index=True, right_index=True)

assert final[final.full_name.isna()].shape[0] == 0

col_rename = {"cor": "rg", "cor_std": "se", "trait": "p1", "grep_stat": "z", "pval": "p", "p2": "p2"}
final = final.rename(columns=col_rename)

final.p1 = final.p1.apply(lambda x: ph_map[x])
final.p2 = final.full_name


print(final.shape)
final

(164, 13)


Unnamed: 0,Cor_All,rg,se,p1,z,p,pval_fdr,p2,significant_bf,significant_fdr,significant,0,full_name
E4_DIABETES,Cor_All,1.107069,0.193941,GD,5.708277,1.141254e-08,1.962957e-06,Diabetes mellitus,*,,*,E4_DIABETES,Diabetes mellitus
E4_DIABETES,Cor_All,0.400351,0.061134,HP,6.548745,5.802246e-11,4.158276e-09,Diabetes mellitus,*,,*,E4_DIABETES,Diabetes mellitus
E4_DIABETES,Cor_All,0.485316,0.332880,EV,1.457931,1.448596e-01,9.294918e-01,Diabetes mellitus,,,,E4_DIABETES,Diabetes mellitus
E4_DIABETES,Cor_All,0.283170,0.060705,GH,4.664690,3.090828e-06,2.044702e-04,Diabetes mellitus,*,,*,E4_DIABETES,Diabetes mellitus
E4_OBESITY,Cor_All,0.680994,0.187512,GD,3.631736,2.815215e-04,7.336621e-03,Obesity,,,,E4_OBESITY,Obesity
...,...,...,...,...,...,...,...,...,...,...,...,...,...
R18_DIZZI_GIDDI,Cor_All,0.687925,0.166724,GH,4.126131,3.689178e-05,1.982933e-03,Dizziness and giddiness,*,,*,R18_DIZZI_GIDDI,Dizziness and giddiness
R18_SYMPTOMS_SIGNS_INVOLVI_CIRCULATO_RESPI_SYSTEMS,Cor_All,0.568640,0.133631,GD,4.255300,2.087689e-05,1.122133e-03,Symptoms and signs involving the circulatory a...,*,,*,R18_SYMPTOMS_SIGNS_INVOLVI_CIRCULATO_RESPI_SYS...,Symptoms and signs involving the circulatory a...
R18_SYMPTOMS_SIGNS_INVOLVI_CIRCULATO_RESPI_SYSTEMS,Cor_All,0.301865,0.098061,HP,3.078339,2.081580e-03,3.315109e-02,Symptoms and signs involving the circulatory a...,,,,R18_SYMPTOMS_SIGNS_INVOLVI_CIRCULATO_RESPI_SYS...,Symptoms and signs involving the circulatory a...
R18_SYMPTOMS_SIGNS_INVOLVI_CIRCULATO_RESPI_SYSTEMS,Cor_All,0.715299,0.468161,EV,1.527891,1.265396e-01,9.294918e-01,Symptoms and signs involving the circulatory a...,,,,R18_SYMPTOMS_SIGNS_INVOLVI_CIRCULATO_RESPI_SYS...,Symptoms and signs involving the circulatory a...


In [16]:
for c in ['I9_OTHHEART', 'FG_OTHHEART']:
    final.loc[final.index==c, 'p2'] = f'{c}: '+ final[final.index==c].p2

In [17]:
# Check if p2 is correlated with itself (and only with itself)

In [18]:
final[final.index.isin(set(final.index.unique())&set(ph_map.keys()))]

Unnamed: 0,Cor_All,rg,se,p1,z,p,pval_fdr,p2,significant_bf,significant_fdr,significant,0,full_name
GEST_DIABETES,Cor_All,1.000274,0.000331,GD,3021.975831,0.0,0.0,Gestational diabetes (for exclusion),*,,*,GEST_DIABETES,Gestational diabetes (for exclusion)
GEST_DIABETES,Cor_All,0.339312,0.12108,HP,2.802379,0.005072731,0.06511266,Gestational diabetes (for exclusion),,,,GEST_DIABETES,Gestational diabetes (for exclusion)
GEST_DIABETES,Cor_All,0.847602,0.532323,EV,1.59227,0.111324,0.9294918,Gestational diabetes (for exclusion),,,,GEST_DIABETES,Gestational diabetes (for exclusion)
GEST_DIABETES,Cor_All,0.290859,0.134381,GH,2.164435,0.03043095,0.3067286,Gestational diabetes (for exclusion),,,,GEST_DIABETES,Gestational diabetes (for exclusion)
I9_HYPTENSPREG,Cor_All,0.339312,0.12108,GD,2.802379,0.005072731,0.0513241,"Hypertension complicating pregnancy, childbirt...",,,,I9_HYPTENSPREG,"Hypertension complicating pregnancy, childbirt..."
I9_HYPTENSPREG,Cor_All,1.000058,5.2e-05,HP,19231.884615,0.0,0.0,"Hypertension complicating pregnancy, childbirt...",*,,*,I9_HYPTENSPREG,"Hypertension complicating pregnancy, childbirt..."
I9_HYPTENSPREG,Cor_All,0.127947,0.309521,EV,0.413371,0.6793348,0.9504818,"Hypertension complicating pregnancy, childbirt...",,,,I9_HYPTENSPREG,"Hypertension complicating pregnancy, childbirt..."
I9_HYPTENSPREG,Cor_All,1.01888,0.032024,GH,31.816138,3.872208e-222,1.1100330000000001e-219,"Hypertension complicating pregnancy, childbirt...",*,,*,I9_HYPTENSPREG,"Hypertension complicating pregnancy, childbirt..."
O15_EXCESS_VOMIT_PREG,Cor_All,0.847602,0.532323,GD,1.59227,0.111324,0.441192,Excessive vomiting in pregnancy,,,,O15_EXCESS_VOMIT_PREG,Excessive vomiting in pregnancy
O15_EXCESS_VOMIT_PREG,Cor_All,0.127947,0.309521,HP,0.413371,0.6793348,0.9304693,Excessive vomiting in pregnancy,,,,O15_EXCESS_VOMIT_PREG,Excessive vomiting in pregnancy


`O15_EXCESS_VOMIT_PREG` and `GEST_DIABETES` from p2 is correlated only with itself from p1 => delete it.

In [19]:
final = final[~final.index.isin(['O15_EXCESS_VOMIT_PREG', 'GEST_DIABETES'])]

In [20]:
ph_map.values()

dict_values(['HP', 'GD', 'GH', 'EV'])

In [21]:
for k in ph_map.values():
    print(k)
    dk = final[(final.p1==k) & (final.significant!='')]
    print(dk.shape)
#     display(dk)

HP
(29, 13)
GD
(17, 13)
GH
(16, 13)
EV
(0, 13)


In [22]:
29+17+16

62

In [16]:
data_to_save = final.loc[:,['p1', 'p2', 'rg', 'se', 'z', 'p', 'significant']]
data_to_save.to_csv('./out_data_fg/feature.csv', index=True)
data_to_save

Unnamed: 0,p1,p2,rg,se,z,p,significant
E4_DIABETES,GD,Diabetes mellitus,1.107069,0.193941,5.708277,1.141254e-08,*
E4_DIABETES,HP,Diabetes mellitus,0.400351,0.061134,6.548745,5.802246e-11,*
E4_DIABETES,EV,Diabetes mellitus,0.485316,0.332880,1.457931,1.448596e-01,
E4_DIABETES,GH,Diabetes mellitus,0.283170,0.060705,4.664690,3.090828e-06,*
E4_OBESITY,GD,Obesity,0.680994,0.187512,3.631736,2.815215e-04,
...,...,...,...,...,...,...,...
R18_DIZZI_GIDDI,GH,Dizziness and giddiness,0.687925,0.166724,4.126131,3.689178e-05,*
R18_SYMPTOMS_SIGNS_INVOLVI_CIRCULATO_RESPI_SYSTEMS,GD,Symptoms and signs involving the circulatory a...,0.568640,0.133631,4.255300,2.087689e-05,*
R18_SYMPTOMS_SIGNS_INVOLVI_CIRCULATO_RESPI_SYSTEMS,HP,Symptoms and signs involving the circulatory a...,0.301865,0.098061,3.078339,2.081580e-03,
R18_SYMPTOMS_SIGNS_INVOLVI_CIRCULATO_RESPI_SYSTEMS,EV,Symptoms and signs involving the circulatory a...,0.715299,0.468161,1.527891,1.265396e-01,


In [25]:
cols_supp = ['Soft tissue disorders',
'Diseases of the musculoskeletal system and connective tissue',
'Peripheral artery disease',
'Major coronary heart disease event',
'Hypertension, essential',
'Heart failure and antihypertensive medication',
'Myocardial infarction',
'Ischemic heart diseases',
'Diabetes mellitus',
'Obesity',]

cols_not_supp = ['Polyarthropathies',
'Nerve, nerve root and plexus disorders',
'Gastrointestinal diseases',
'Diseases of the ear and mastoid process',
'Dizziness and giddiness',]
print(len(cols_supp), len(cols_not_supp))

10 5


In [29]:
data_to_save[data_to_save.p2.isin(cols_supp)].to_csv('./out_data_fg/feature_supp.csv', index=True)
data_to_save[data_to_save.p2.isin(cols_not_supp)].to_csv('./out_data_fg/feature_not_supp.csv', index=True)

In [63]:
# data_to_save[data_to_save.p2 != 'O15_PRETERM___O60'].to_csv('./data/feature4.csv', index=False)

In [83]:
156/4

39.0