In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import scipy as sp
import matplotlib.pyplot as plt
import re
from os import listdir
from os.path import isfile, join
from scipy import stats
from matplotlib.ticker import MaxNLocator

import warnings
warnings.filterwarnings('ignore')

In [2]:
important = ['TLR3','TBPL2','GBP2','GBC4','N4BP2','AURKA','RRM2','TXK']

In [3]:
df = pd.read_csv("./Results/rules_males_full.csv")

In [4]:
#Split the string by reading the left hand and the right hand sides 
df['lhs'] = df['rules'].apply(lambda x : re.search(r"\{(.+)\} ", x).group(1))
df['rhs'] = df['rules'].apply(lambda x : re.search(r" \{(.+)\}", x).group(1))

#drop the rules column
df.drop(columns='rules',inplace = True)
#sort the columns differently 
df = df[['lhs','rhs','support','confidence','lift','count','coverage','pvalue_test']]

df.reset_index(inplace=True)
df.drop(columns='index', inplace=True)
df['lhs'] = df['lhs'].str.replace('=1','')
df['lhs'] = df['lhs'].str.replace('KRTAP27.1_1','KRTAP27-1_1')
df['count'] = df['count'].astype('int32')
df = df.sort_values(by=['support'], ascending=False)

In [5]:
minconf_1 = 0.8
minsup_1 = 0.08

In [6]:
minconf_0 = 0.8
minsup_0 = 0.12

In [7]:
pat = pd.read_csv('./Data/AssociationRules/All_Male_bool.csv')

In [8]:
pat1 = pat[pat['grading']==1]

grading1 = df[((df['confidence']>=minconf_1) 
                                          & (df['rhs']=='grading=1') 
                                          & (df['support']>=minsup_1))]

grading1['lhs'] = grading1['lhs'].str.replace('.','-')

sel_sev = grading1.iloc[0:28]

In [9]:
pat0 = pat[pat['grading']==0]

grading0 = df[((df['confidence']>=minconf_0) 
                                          & (df['rhs']=='grading=0') 
                                          & (df['support']>=minsup_0))]

grading0['important'] = grading0.lhs.apply(lambda x : np.intersect1d(x.split(','),important))

grading0['lhs'] = grading0['lhs'].str.replace('.','-')
    
sel_prot = grading0.iloc[0:32]

# Statistical analysis of top severity rules

In [10]:
pheno = pd.read_csv('./Data/phenotypes_1319.csv', sep=';')
pheno.set_index('PatientID', inplace=True)
pheno1 = pheno[pheno['grading_1319_adj_a_s']=='1']

pat_index = pd.read_csv('./Data/AssociationRules/All_Male_bool_index.csv')
pat_index_1 = pat_index[pat_index['grading']=='1']

In [11]:
pheno1.rename(columns={'diabetes (1=affected, 0=not affected, N/A= data not available)':'Diabetes',
                     'hypertension (1=affected, 0=not affected, N/A= data not available)':'Hypertension',
                     'cancer  (1=affected, 0=not affected, N/A= data not available)':'Cancer',
                     'Autoimmune disease (1 = Yes, 0 = No)':'Autoimmune disease',
                     'Congestive/Ischemic Heart Failure (1= Yes; 0=No)':'Congestive/Ischemic Heart Failure',
                     'Obesity (1= Yes; 0=No)':'Obesity',
                     'Dyslipidemia (1= Yes; 0=No)':'Dyslipidemia',
                     'Hypothyroidism (1= Yes; 0=No)':'Hypothyroidism',
                     'Asthma/COPD/OSAS (1= Yes; 0=No)':'Asthma/COPD/OSAS',
                      'comorbidity_coded (1 = any comorbidity, 0 = none)' : 'comorbidity_coded'}, inplace=True)

In [12]:
series = []
genes = []
patients = []

for i in range(len(sel_sev)):
    genes.append(sel_sev.iloc[i]['lhs'].split(','))
    series.append(pd.Series((pat_index_1[genes[i]].values == 1).all(axis=1), name='bools'))
    patients.append(pat_index_1[series[i].values].PatientID.values)

In [13]:
d_sev = {'lhs':[],
    'count' : [],
    'support' : [],
    'confidence' : [],
    'mean_age' : [],
    'below_average' : [],
    'std_age' : [],
    'min_age' : [],
    '25%_age' : [],
    '50%_age' : [],
    '75%_age' : [],
    'max_age' : [],
    'mean_grading' : [],
    'std_grading' : [],
    '0' : [],
    '1' : [],
    '2' : [],
    '3' : [],
    '4' : [],
    'score' : [],
    'blood_group' : [],
    'diabetes' : [],
    'hypertension' : [],
    'cancer' : [],
    'autoimmune_disease' : [],
    'congestive_ischemic_heart_failure' : [],
    'asthma' : [],
    'hypothyroidism' : [],
    'obesity' : [],
    'dyslipidemia' : [],
    'comorbidity_coded' : [],
    'heart' : [],
    'liver' : [],
    'pancreas' : [],
    'Lymphoid System' : [],
    'Proinflammatory Cytokines' : [], 
    'Clotting system' : [],
    'kidney' : [],         
    'patients' : []
    }


In [14]:
for i in range(len(sel_sev)): 
    d_sev['lhs'].append(sel_sev.iloc[i].lhs)
    d_sev['support'].append(sel_sev.iloc[i]['support'])
    d_sev['confidence'].append(sel_sev.iloc[i]['confidence'])
    d_sev['patients'].append(' '.join(patients[i]))
    
    selected_sev = pheno1.loc[patients[i]]
    d_sev['count'].append(len(selected_sev))
    
    d_sev['mean_age'].append(np.mean(selected_sev['Age']))
    d_sev['std_age'].append(np.std(selected_sev['Age']))
    d_sev['min_age'].append(np.amin(selected_sev['Age']))
    d_sev['max_age'].append(np.amax(selected_sev['Age']))
    d_sev['below_average'].append(True if np.mean(selected_sev['Age']) <= 59 else False)
    d_sev['25%_age'].append(np.percentile(selected_sev['Age'], 25))
    d_sev['50%_age'].append(np.percentile(selected_sev['Age'], 50))
    d_sev['75%_age'].append(np.percentile(selected_sev['Age'], 75))
    
    d_sev['mean_grading'].append(np.mean(selected_sev['grading']))
    d_sev['std_grading'].append(np.std(selected_sev['grading']))
    
    d_sev['0'].append(np.count_nonzero(selected_sev['grading'] == 0))
    d_sev['1'].append(np.count_nonzero(selected_sev['grading'] == 1))
    d_sev['2'].append(np.count_nonzero(selected_sev['grading'] == 2))
    d_sev['3'].append(np.count_nonzero(selected_sev['grading'] == 3))
    d_sev['4'].append(np.count_nonzero(selected_sev['grading'] == 4))
    
    d_sev['score'].append(np.mean(selected_sev['Age']) * (4-np.mean(selected_sev['grading'])))
    
    d_sev['blood_group'].append(sp.stats.mode(selected_sev['Blood Group'])[0][0])
    d_sev['diabetes'].append(selected_sev['Diabetes'].value_counts()[1]/len(selected_sev))
    d_sev['hypertension'].append(selected_sev['Hypertension'].value_counts()[1]/len(selected_sev))
    
    if len(selected_sev['Cancer'].value_counts()) > 1 :
        d_sev['cancer'].append(selected_sev['Cancer'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['cancer'].append(0)
        
    d_sev['autoimmune_disease'].append(selected_sev['Autoimmune disease'].value_counts()[1]/len(selected_sev))
    
    if len(selected_sev['Congestive/Ischemic Heart Failure'].value_counts()) > 1 :
        d_sev['congestive_ischemic_heart_failure'].append(selected_sev['Congestive/Ischemic Heart Failure'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['congestive_ischemic_heart_failure'].append(0)
        
    d_sev['asthma'].append(selected_sev['Asthma/COPD/OSAS'].value_counts()[1]/len(selected_sev))
    
    if len(selected_sev['Hypothyroidism'].value_counts()) > 1 :
        d_sev['hypothyroidism'].append(selected_sev['Hypothyroidism'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['hypothyroidism'].append(0)
        
    if len(selected_sev['Dyslipidemia'].value_counts()) > 1 :
        d_sev['dyslipidemia'].append(selected_sev['Dyslipidemia'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['dyslipidemia'].append(0)
    
    if len(selected_sev['Obesity'].value_counts()) > 1 :
        d_sev['obesity'].append(selected_sev['Obesity'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['obesity'].append(0)
    
    
    d_sev['heart'].append(len(selected_sev[selected_sev['heart'].str.contains('Yes',case=False, na=False)])/len(selected_sev))
    d_sev['liver'].append(len(selected_sev[(selected_sev['liver'].str.contains('yes',case=False, na=False))])/len(selected_sev))
    d_sev['Proinflammatory Cytokines'].append(len(selected_sev[(selected_sev['Proinflammatory Cytokines']\
                                                                .str.contains('yes',case=False,na=False))])/len(selected_sev))
    d_sev['pancreas'].append(len(selected_sev[(selected_sev['pancreas'].str.contains('yes',case=False,na=False))])/len(selected_sev))
    d_sev['kidney'].append(len(selected_sev[(selected_sev['kidney'].str.contains('yes',case=False,na=False))])/len(selected_sev))
    d_sev['Lymphoid System'].append(len(selected_sev[(selected_sev['Lymphoid System'].str.contains('yes',case=False,na=False))])/len(selected_sev))
    d_sev['Clotting system'].append(len(selected_sev[(selected_sev['Clotting system'].str.contains('yes',case=False,na=False))])/len(selected_sev))
    
    d_sev['comorbidity_coded'].append(selected_sev['comorbidity_coded'].value_counts()[1]/len(selected_sev))

In [15]:
description_sev = pd.DataFrame(data=d_sev)

In [16]:
description_sev.sort_values(by=['score'], ascending=True)

Unnamed: 0,lhs,count,support,confidence,mean_age,below_average,std_age,min_age,25%_age,50%_age,...,dyslipidemia,comorbidity_coded,heart,liver,pancreas,Lymphoid System,Proinflammatory Cytokines,Clotting system,kidney,patients
11,"OTOP2_1,OR7A17_2,C4orf36_1,TGIF2LX_1_hemi",47,0.091153,0.809524,60.042553,False,11.799671,32.0,55.0,61.0,...,0.021277,0.510638,0.340426,0.489362,0.12766,0.212766,0.297872,0.191489,0.319149,AR-COV-14 AR-COV-33 BS-COV-102 BS-COV-35 BS-CO...
22,"FGD2_1,TLR3_1,PSKH2_1,IL17RC_1_homo",39,0.085791,0.864865,61.307692,False,12.318745,31.0,52.0,63.0,...,0.0,0.564103,0.435897,0.461538,0.153846,0.179487,0.282051,0.102564,0.307692,AR-COV-24 BS-COV-21 BS-COV-2 BS-COV-34 BS-COV-...
2,"FGD2_1,TLR3_1,PSKH2_1,FOXR2_1_hemi",47,0.107239,0.8,60.787234,False,12.761205,30.0,50.5,65.0,...,0.021277,0.531915,0.404255,0.468085,0.148936,0.170213,0.276596,0.12766,0.276596,AR-COV-14 AR-COV-24 BS-COV-17 BS-COV-21 BS-COV...
5,"ADAM15_1,LRRC18_1,OTOP2_1,GCAT_2",46,0.096515,0.818182,58.521739,True,12.596996,20.0,51.25,60.5,...,0.0,0.521739,0.347826,0.608696,0.152174,0.304348,0.304348,0.173913,0.26087,BS-COV-106 BS-COV-15 BS-COV-35 BS-COV-3 BS-COV...
7,"MSH4_1,ADAM15_1,FGD2_1,IL17RC_1_homo",49,0.093834,0.813953,59.755102,False,12.821428,20.0,51.0,61.0,...,0.0,0.571429,0.306122,0.428571,0.102041,0.244898,0.306122,0.102041,0.265306,AR-COV-24 BS-COV-2 BS-COV-34 BS-COV-78 TV-COV-...
12,"TBPL2_1,PCDHB15_2,HNRNPA1L2_2,FIGLA_1_hemi",41,0.091153,0.809524,61.634146,False,9.534932,40.0,55.0,62.0,...,0.04878,0.560976,0.439024,0.634146,0.170732,0.365854,0.390244,0.243902,0.292683,AR-COV-14 BS-COV-15 BS-COV-47 BS-COV-6 BS-COV-...
17,"GORAB_1,FGD2_1,TLR3_1,PSKH2_1",41,0.088472,0.825,59.365854,False,12.26885,30.0,50.0,63.0,...,0.02439,0.536585,0.390244,0.536585,0.146341,0.170732,0.317073,0.146341,0.243902,AR-COV-14 AR-COV-24 BS-COV-17 BS-COV-21 BS-COV...
18,"MSH4_1,TLR3_1,IL17RC_1_homo,TGIF2LX_1_hemi",45,0.088472,0.825,62.822222,False,10.324273,40.0,56.0,65.0,...,0.0,0.688889,0.444444,0.533333,0.244444,0.244444,0.244444,0.155556,0.333333,AR-COV-25 BS-COV-102 BS-COV-34 BS-COV-47 BS-CO...
23,"MUC5AC_1,TFRC_1,MTERF4_2,GMNC_1",45,0.085791,0.820513,59.133333,False,13.027492,20.0,53.0,60.0,...,0.088889,0.644444,0.288889,0.555556,0.155556,0.222222,0.355556,0.2,0.2,AR-COV-10 AR-COV-18 AR-COV-24 BS-COV-15 BS-COV...
27,"ADAM15_1,GCAT_2,LRRFIP1_1,RBBP6_2",38,0.08311,0.815789,60.052632,False,11.713571,20.0,53.0,60.0,...,0.0,0.473684,0.263158,0.605263,0.105263,0.210526,0.315789,0.210526,0.236842,AR-COV-18 BS-COV-102 BS-COV-106 BS-COV-35 SM-C...


In [17]:
description_sev.to_csv('./males_severity_stats_full.csv',index=True, sep=';')

# Statistical analysis of top protection rules

In [18]:
pheno = pd.read_csv('./Data/phenotypes_1319.csv', sep=';')
pheno.set_index('PatientID', inplace=True)
pheno0 = pheno[pheno['grading_1319_adj_a_s']=='0']

pat_index = pd.read_csv('./Data/AssociationRules/All_Male_bool_index.csv')
pat_index_0 = pat_index[pat_index['grading']=='0']

In [19]:
pheno0.rename(columns={'diabetes (1=affected, 0=not affected, N/A= data not available)':'Diabetes',
                     'hypertension (1=affected, 0=not affected, N/A= data not available)':'Hypertension',
                     'cancer  (1=affected, 0=not affected, N/A= data not available)':'Cancer',
                     'Autoimmune disease (1 = Yes, 0 = No)':'Autoimmune disease',
                     'Congestive/Ischemic Heart Failure (1= Yes; 0=No)':'Congestive/Ischemic Heart Failure',
                     'Obesity (1= Yes; 0=No)':'Obesity',
                     'Dyslipidemia (1= Yes; 0=No)':'Dyslipidemia',
                     'Hypothyroidism (1= Yes; 0=No)':'Hypothyroidism',
                     'Asthma/COPD/OSAS (1= Yes; 0=No)':'Asthma/COPD/OSAS',
                      'comorbidity_coded (1 = any comorbidity, 0 = none)' : 'comorbidity_coded'}, inplace=True)

In [20]:
series = []
genes = []
patients = []

for i in range(len(sel_prot)):
    genes.append(sel_prot.iloc[i]['lhs'].split(','))
    series.append(pd.Series((pat_index_0[genes[i]].values == 1).all(axis=1), name='bools'))
    patients.append(pat_index_0[series[i].values].PatientID.values)

In [21]:
d_prot = {'lhs':[],
    'count' : [],
    'support' : [],
    'confidence' : [],
    'mean_age' : [],
    'above_average' : [],
    'std_age' : [],
    'min_age' : [],
    '25%_age' : [],
    '50%_age' : [],
    '75%_age' : [],
    'max_age' : [],
    'mean_grading' : [],
    'std_grading' : [],
    '0' : [],
    '1' : [],
    '2' : [],
    '3' : [],
    '4' : [],
    'score' : [],
    'blood_group' : [],
    'diabetes' : [],
    'hypertension' : [],
    'cancer' : [],
    'autoimmune_disease' : [],
    'congestive_ischemic_heart_failure' : [],
    'asthma' : [],
    'hypothyroidism' : [],
    'obesity' : [],
    'dyslipidemia' : [],
    'comorbidity_coded' : [],
    'heart' : [],
    'liver' : [],
    'pancreas' : [],
    'Lymphoid System' : [],
    'Proinflammatory Cytokines' : [], 
    'Clotting system' : [],
    'kidney' : [],         
    'patients' : []
    }

In [22]:
for i in range(len(sel_prot)): 
    d_prot['lhs'].append(sel_prot.iloc[i].lhs)
    d_prot['support'].append(sel_prot.iloc[i]['support'])
    d_prot['confidence'].append(sel_prot.iloc[i]['confidence'])
    d_prot['patients'].append(' '.join(patients[i]))
    
    selected_prot = pheno0.loc[patients[i]]
    d_prot['count'].append(len(selected_prot))
    
    d_prot['mean_age'].append(np.mean(selected_prot['Age']))
    d_prot['std_age'].append(np.std(selected_prot['Age']))
    d_prot['min_age'].append(np.amin(selected_prot['Age']))
    d_prot['max_age'].append(np.amax(selected_prot['Age']))
    d_prot['above_average'].append(True if np.mean(selected_prot['Age']) >= 59 else False)
    d_prot['25%_age'].append(np.percentile(selected_prot['Age'], 25))
    d_prot['50%_age'].append(np.percentile(selected_prot['Age'], 50))
    d_prot['75%_age'].append(np.percentile(selected_prot['Age'], 75))
    
    d_prot['mean_grading'].append(np.mean(selected_prot['grading']))
    d_prot['std_grading'].append(np.std(selected_prot['grading']))
    
    d_prot['0'].append(np.count_nonzero(selected_prot['grading'] == 0))
    d_prot['1'].append(np.count_nonzero(selected_prot['grading'] == 1))
    d_prot['2'].append(np.count_nonzero(selected_prot['grading'] == 2))
    d_prot['3'].append(np.count_nonzero(selected_prot['grading'] == 3))
    d_prot['4'].append(np.count_nonzero(selected_prot['grading'] == 4))
    
    d_prot['score'].append(np.mean(selected_prot['Age']) * (4-np.mean(selected_prot['grading'])))
    
    d_prot['blood_group'].append(sp.stats.mode(selected_prot['Blood Group'])[0][0])
    d_prot['diabetes'].append(selected_prot['Diabetes'].value_counts()[1]/len(selected_prot))
    d_prot['hypertension'].append(selected_prot['Hypertension'].value_counts()[1]/len(selected_prot))
    
    if len(selected_prot['Cancer'].value_counts()) > 1 :
        d_prot['cancer'].append(selected_prot['Cancer'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['cancer'].append(0)
        
    d_prot['autoimmune_disease'].append(selected_prot['Autoimmune disease'].value_counts()[1]/len(selected_prot))
    
    if len(selected_prot['Congestive/Ischemic Heart Failure'].value_counts()) > 1 :
        d_prot['congestive_ischemic_heart_failure'].append(selected_prot['Congestive/Ischemic Heart Failure'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['congestive_ischemic_heart_failure'].append(0)
        
    d_prot['asthma'].append(selected_prot['Asthma/COPD/OSAS'].value_counts()[1]/len(selected_prot))
    
    if len(selected_prot['Hypothyroidism'].value_counts()) > 1 :
        d_prot['hypothyroidism'].append(selected_prot['Hypothyroidism'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['hypothyroidism'].append(0)
        
    if len(selected_prot['Dyslipidemia'].value_counts()) > 1 :
        d_prot['dyslipidemia'].append(selected_prot['Dyslipidemia'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['dyslipidemia'].append(0)
    
    if len(selected_prot['Obesity'].value_counts()) > 1 :
        d_prot['obesity'].append(selected_prot['Obesity'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['obesity'].append(0)
        
    d_prot['heart'].append(len(selected_prot[selected_prot['heart'].str.contains('Yes',case=False, na=False)])/len(selected_prot))
    d_prot['liver'].append(len(selected_prot[(selected_prot['liver'].str.contains('yes',case=False, na=False))])/len(selected_prot))
    d_prot['Proinflammatory Cytokines'].append(len(selected_prot[(selected_prot['Proinflammatory Cytokines']\
                                                                .str.contains('yes',case=False,na=False))])/len(selected_prot))
    d_prot['pancreas'].append(len(selected_prot[(selected_prot['pancreas'].str.contains('yes',case=False,na=False))])/len(selected_prot))
    d_prot['kidney'].append(len(selected_prot[(selected_prot['kidney'].str.contains('yes',case=False,na=False))])/len(selected_prot))
    d_prot['Lymphoid System'].append(len(selected_prot[(selected_prot['Lymphoid System'].str.contains('yes',case=False,na=False))])/len(selected_prot))
    d_prot['Clotting system'].append(len(selected_prot[(selected_prot['Clotting system'].str.contains('yes',case=False,na=False))])/len(selected_prot))
    
    d_prot['comorbidity_coded'].append(selected_prot['comorbidity_coded'].value_counts()[1]/len(selected_prot))

In [23]:
description_prot = pd.DataFrame(data=d_prot)

In [24]:
description_prot.sort_values(by=['score'], ascending=True)

Unnamed: 0,lhs,count,support,confidence,mean_age,above_average,std_age,min_age,25%_age,50%_age,...,dyslipidemia,comorbidity_coded,heart,liver,pancreas,Lymphoid System,Proinflammatory Cytokines,Clotting system,kidney,patients
23,"HDLBP_1,STON2_3,RRM2_1_homo,FOXR2_1_hemi",65,0.147453,0.808824,58.338462,False,18.614533,19.0,47.0,57.0,...,0.015385,0.292308,0.169231,0.107692,0.0,0.030769,0.030769,0.030769,0.030769,AR-COV-13 BL54 BS-COV-5 COV-CHIANC-4 VAL-COV-8...
18,"PPM1E_1,DNAH6_1,HOXD4_1,POF1B_1_hemi",68,0.150134,0.811594,57.5,False,18.193001,24.0,43.75,53.5,...,0.014706,0.411765,0.161765,0.205882,0.014706,0.073529,0.044118,0.029412,0.191176,BL58 BS-COV-75 COV-CHIANC-13 COV-CHIANC-14 COV...
7,"NLRP6_1,ITIH2_1,LINS1_3,PNPT1_1",73,0.160858,0.810811,55.561644,False,17.555381,19.0,44.0,52.0,...,0.027397,0.273973,0.123288,0.150685,0.013699,0.068493,0.054795,0.041096,0.082192,BL54 BL58 BS-COV-38 BS-COV-5 COV-CHIANC-6 COV-...
13,"TTLL4_1,NLRP6_1,ITIH2_1,TRIM65_2",70,0.152815,0.802817,57.071429,False,18.921131,18.0,42.25,57.0,...,0.028571,0.328571,0.128571,0.171429,0.014286,0.071429,0.114286,0.028571,0.071429,BS-COV-75 BS-COV-80 BS-COV-99 COV-CHIANC-10 CO...
17,"AURKA_1,HDLBP_1,SHANK3_1,SLC25A5_1_hemi",71,0.150134,0.811594,58.859155,False,17.202437,24.0,49.0,58.0,...,0.014085,0.43662,0.211268,0.112676,0.028169,0.042254,0.056338,0.028169,0.126761,AR-COV-19 BS-COV-38 COV-CHIANC-4 VAL-COV-8 BL1...
22,"PPM1E_1,AKAP10_1,HOXA4_1_homo,PAX4_1_homo",69,0.147453,0.820896,58.028986,False,19.418332,19.0,45.0,56.0,...,0.043478,0.376812,0.188406,0.173913,0.043478,0.057971,0.086957,0.028986,0.173913,BS-COV-80 BS-COV-99 COV-CHIANC-14 VAL-COV-2 SI...
21,"NLRP6_1,SWT1_2,PNPT1_1,HOXA4_1_homo",67,0.147453,0.833333,60.223881,True,20.111496,24.0,44.0,60.0,...,0.059701,0.402985,0.179104,0.208955,0.029851,0.074627,0.089552,0.029851,0.179104,BL54 BL58 BS-COV-108 BS-COV-38 BS-COV-80 BS-CO...
12,"PPM1E_1,NLRP6_1,CPO_1,HOXD4_1",73,0.155496,0.805556,57.232877,False,18.422381,21.0,42.0,57.0,...,0.041096,0.30137,0.205479,0.232877,0.013699,0.068493,0.054795,0.027397,0.136986,AR-COV-19 BL54 BS-COV-5 BS-COV-69 BS-COV-75 BS...
10,"PPM1E_1,N4BP2_1,KRTAP27-1_1,PAX4_1_homo",72,0.158177,0.819444,59.611111,True,19.064624,18.0,45.0,59.5,...,0.013889,0.375,0.236111,0.180556,0.027778,0.097222,0.097222,0.027778,0.166667,BL54 BS-COV-5 BS-COV-75 BS-COV-99 COV-CHIANC-1...
29,"PPM1E_1,SYT16_1,TMEM221_2_homo,TENT5D_1_hemi",65,0.144772,0.80597,58.861538,False,17.02295,24.0,48.0,59.0,...,0.015385,0.415385,0.169231,0.169231,0.0,0.076923,0.061538,0.0,0.092308,AR-COV-19 BL58 BS-COV-75 BS-COV-80 BS-COV-87 C...


In [25]:
description_prot.to_csv('./males_protection_stats_full.csv',index=True, sep=';')