In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import scipy as sp
import matplotlib.pyplot as plt
import re
from os import listdir
from os.path import isfile, join
from scipy import stats
from matplotlib.ticker import MaxNLocator

import warnings
warnings.filterwarnings('ignore')

In [2]:
important = ['EP300','HERC5','TP53','HAPLN3','ALCAM','APOBEC1']

In [3]:
df = pd.read_csv("./Results/rules_females_full.csv")

In [4]:
#Split the string by reading the left hand and the right hand sides 
df['lhs'] = df['rules'].apply(lambda x : re.search(r"\{(.+)\} ", x).group(1))
df['rhs'] = df['rules'].apply(lambda x : re.search(r" \{(.+)\}", x).group(1))

#drop the rules column
df.drop(columns='rules',inplace = True)
#sort the columns differently 
df = df[['lhs','rhs','support','confidence','lift','count','coverage','pvalue_test']]

df.reset_index(inplace=True)
df.drop(columns='index', inplace=True)
df['lhs'] = df['lhs'].str.replace('=1','')
df['count'] = df['count'].astype('int32')
df = df.sort_values(by=['support'], ascending=False)

In [5]:
minconf_1 = 0.8
minsup_1 = 0.08

In [6]:
minconf_0 = 0.8
minsup_0 = 0.08

In [7]:
pat = pd.read_csv('./Data/AssociationRules/All_Female_bool.csv')

In [8]:
pat0 = pat[pat['grading']==0]
grading0 = df[((df['confidence']>=minconf_0) 
                                          & (df['rhs']=='grading=0') 
                                          & (df['support']>=minsup_0))]

grading0['lhs'] = grading0['lhs'].str.replace('.',';')

sel_prot = grading0.iloc[0:24]

In [9]:
pat1 = pat[pat['grading']==1]
grading1 = df[((df['confidence']>=minconf_1) 
                                          & (df['rhs']=='grading=1') 
                                          & (df['support']>=minsup_1))]

grading1['lhs'] = grading1['lhs'].str.replace('.',';')

sel_sev = grading1.iloc[0:24]

# Statistical analysis of top severity rules

In [10]:
pheno = pd.read_csv('./Data/phenotypes_1319.csv', sep=';')
pheno.set_index('PatientID', inplace=True)
pheno1 = pheno[pheno['grading_1319_adj_a_s']=='1']

pat_index = pd.read_csv('./Data/AssociationRules/All_Female_bool_index.csv')
pat_index_1 = pat_index[pat_index['grading']=='1']

In [11]:
pheno1.rename(columns={'diabetes (1=affected, 0=not affected, N/A= data not available)':'Diabetes',
                     'hypertension (1=affected, 0=not affected, N/A= data not available)':'Hypertension',
                     'cancer  (1=affected, 0=not affected, N/A= data not available)':'Cancer',
                     'Autoimmune disease (1 = Yes, 0 = No)':'Autoimmune disease',
                     'Congestive/Ischemic Heart Failure (1= Yes; 0=No)':'Congestive/Ischemic Heart Failure',
                     'Obesity (1= Yes; 0=No)':'Obesity',
                     'Dyslipidemia (1= Yes; 0=No)':'Dyslipidemia',
                     'Hypothyroidism (1= Yes; 0=No)':'Hypothyroidism',
                     'Asthma/COPD/OSAS (1= Yes; 0=No)':'Asthma/COPD/OSAS',
                      'comorbidity_coded (1 = any comorbidity, 0 = none)' : 'comorbidity_coded'}, inplace=True)

In [12]:
series = []
genes = []
patients = []

for i in range(len(sel_sev)):
    genes.append(sel_sev.iloc[i]['lhs'].split(','))
    series.append(pd.Series((pat_index_1[genes[i]].values == 1).all(axis=1), name='bools'))
    patients.append(pat_index_1[series[i].values].PatientID.values)

In [13]:
d_sev = {'lhs':[],
    'count' : [],
    'support' : [],
    'confidence' : [],
    'mean_age' : [],
    'below_average' : [],
    'std_age' : [],
    'min_age' : [],
    '25%_age' : [],
    '50%_age' : [],
    '75%_age' : [],
    'max_age' : [],
    'mean_grading' : [],
    'std_grading' : [],
    '0' : [],
    '1' : [],
    '2' : [],
    '3' : [],
    '4' : [],
    'score' : [],
    'blood_group' : [],
    'diabetes' : [],
    'hypertension' : [],
    'cancer' : [],
    'autoimmune_disease' : [],
    'congestive_ischemic_heart_failure' : [],
    'asthma' : [],
    'hypothyroidism' : [],
    'obesity' : [],
    'dyslipidemia' : [],
    'comorbidity_coded' : [],
    'heart' : [],
    'liver' : [],
    'pancreas' : [],
    'Lymphoid System' : [],
    'Proinflammatory Cytokines' : [], 
    'Clotting system' : [],
    'kidney' : [], 
    'patients' : []
    }

In [14]:
for i in range(len(sel_sev)): 
    d_sev['lhs'].append(sel_sev.iloc[i].lhs)
    d_sev['support'].append(sel_sev.iloc[i]['support'])
    d_sev['confidence'].append(sel_sev.iloc[i]['confidence'])
    d_sev['patients'].append(' '.join(patients[i]))
    
    selected_sev = pheno1.loc[patients[i]]
    d_sev['count'].append(len(selected_sev))
    
    d_sev['mean_age'].append(np.mean(selected_sev['Age']))
    d_sev['std_age'].append(np.std(selected_sev['Age']))
    d_sev['min_age'].append(np.amin(selected_sev['Age']))
    d_sev['max_age'].append(np.amax(selected_sev['Age']))
    d_sev['below_average'].append(True if np.mean(selected_sev['Age']) < 59 else False)
    d_sev['25%_age'].append(np.percentile(selected_sev['Age'], 25))
    d_sev['50%_age'].append(np.percentile(selected_sev['Age'], 50))
    d_sev['75%_age'].append(np.percentile(selected_sev['Age'], 75))
    
    d_sev['mean_grading'].append(np.mean(selected_sev['grading']))
    d_sev['std_grading'].append(np.std(selected_sev['grading']))
    
    d_sev['0'].append(np.count_nonzero(selected_sev['grading'] == 0))
    d_sev['1'].append(np.count_nonzero(selected_sev['grading'] == 1))
    d_sev['2'].append(np.count_nonzero(selected_sev['grading'] == 2))
    d_sev['3'].append(np.count_nonzero(selected_sev['grading'] == 3))
    d_sev['4'].append(np.count_nonzero(selected_sev['grading'] == 4))
    
    d_sev['score'].append(np.mean(selected_sev['Age']) * (4-np.mean(selected_sev['grading'])))
    
    d_sev['blood_group'].append(sp.stats.mode(selected_sev['Blood Group'])[0][0])
    d_sev['diabetes'].append(selected_sev['Diabetes'].value_counts()[1]/len(selected_sev))
    d_sev['hypertension'].append(selected_sev['Hypertension'].value_counts()[1]/len(selected_sev))
    
    if len(selected_sev['Cancer'].value_counts()) > 1 :
        d_sev['cancer'].append(selected_sev['Cancer'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['cancer'].append(0)
        
    d_sev['autoimmune_disease'].append(selected_sev['Autoimmune disease'].value_counts()[1]/len(selected_sev))
    
    if len(selected_sev['Congestive/Ischemic Heart Failure'].value_counts()) > 1 :
        d_sev['congestive_ischemic_heart_failure'].append(selected_sev['Congestive/Ischemic Heart Failure'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['congestive_ischemic_heart_failure'].append(0)
        
    d_sev['asthma'].append(selected_sev['Asthma/COPD/OSAS'].value_counts()[1]/len(selected_sev))
    
    if len(selected_sev['Hypothyroidism'].value_counts()) > 1 :
        d_sev['hypothyroidism'].append(selected_sev['Hypothyroidism'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['hypothyroidism'].append(0)
        
    if len(selected_sev['Dyslipidemia'].value_counts()) > 1 :
        d_sev['dyslipidemia'].append(selected_sev['Dyslipidemia'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['dyslipidemia'].append(0)
    
    if len(selected_sev['Obesity'].value_counts()) > 1 :
        d_sev['obesity'].append(selected_sev['Obesity'].value_counts()[1]/len(selected_sev))
    else:
        d_sev['obesity'].append(0)
        
    d_sev['heart'].append(len(selected_sev[selected_sev['heart'].str.contains('Yes',case=False, na=False)])/len(selected_sev))
    d_sev['liver'].append(len(selected_sev[(selected_sev['liver'].str.contains('yes',case=False, na=False))])/len(selected_sev))
    d_sev['Proinflammatory Cytokines'].append(len(selected_sev[(selected_sev['Proinflammatory Cytokines']\
                                                                .str.contains('yes',case=False,na=False))])/len(selected_sev))
    d_sev['pancreas'].append(len(selected_sev[(selected_sev['pancreas'].str.contains('yes',case=False,na=False))])/len(selected_sev))
    d_sev['kidney'].append(len(selected_sev[(selected_sev['kidney'].str.contains('yes',case=False,na=False))])/len(selected_sev))
    d_sev['Lymphoid System'].append(len(selected_sev[(selected_sev['Lymphoid System'].str.contains('yes',case=False,na=False))])/len(selected_sev))
    d_sev['Clotting system'].append(len(selected_sev[(selected_sev['Clotting system'].str.contains('yes',case=False,na=False))])/len(selected_sev))
    
    d_sev['comorbidity_coded'].append(selected_sev['comorbidity_coded'].value_counts()[1]/len(selected_sev))

In [15]:
description_sev = pd.DataFrame(data=d_sev)

In [16]:
description_sev.sort_values(by=['score'], ascending=True)

Unnamed: 0,lhs,count,support,confidence,mean_age,below_average,std_age,min_age,25%_age,50%_age,...,dyslipidemia,comorbidity_coded,heart,liver,pancreas,Lymphoid System,Proinflammatory Cytokines,Clotting system,kidney,patients
20,"ALK_3,IBSP_4,C1orf109_1",36,0.099206,0.833333,57.361111,True,13.752076,29.0,47.0,58.0,...,0.055556,0.472222,0.333333,0.388889,0.194444,0.194444,0.305556,0.111111,0.166667,AR-COV-11 AR-COV-15 BL70 BS-COV-31 TV-COV-2 SP...
11,"FABP1_1,C1orf109_1,TMEM159_1_homo,MTHFD1_1_homo",35,0.107143,0.9,56.457143,True,14.697605,25.0,48.0,59.0,...,0.028571,0.514286,0.228571,0.342857,0.085714,0.114286,0.2,0.057143,0.114286,AR-COV-15 BS-COV-11 BS-COV-22 BS-COV-31 BS-COV...
17,"EP300_1,PPP1R9A_1,EPCAM_1,MTHFD1_1_homo",35,0.103175,0.866667,60.914286,False,16.303149,25.0,49.0,59.0,...,0.085714,0.628571,0.228571,0.4,0.171429,0.085714,0.228571,0.142857,0.171429,AR-COV-23 AR-COV-2 AR-COV-9 BS-COV-31 BS-COV-7...
10,"EP300_1,PPP1R9A_1,EPCAM_1,IL17RC_1_homo",35,0.107143,0.818182,57.371429,True,17.289279,19.0,46.0,57.0,...,0.028571,0.6,0.228571,0.371429,0.142857,0.085714,0.171429,0.114286,0.228571,AR-COV-2 AR-COV-9 BS-COV-31 BS-COV-71 BS-COV-9...
12,"EP300_1,FABP1_1,RMI1_1_homo",38,0.107143,0.818182,60.184211,False,16.373688,19.0,48.0,59.0,...,0.026316,0.578947,0.263158,0.368421,0.131579,0.0,0.105263,0.078947,0.157895,AR-COV-11 AR-COV-1 BL70 BS-COV-31 BS-COV-50 BS...
22,"HERC5_1,CD52_1_homo,CILP_3_homo",36,0.099206,0.833333,56.416667,True,16.51157,25.0,46.75,54.5,...,0.0,0.555556,0.305556,0.388889,0.083333,0.111111,0.194444,0.027778,0.138889,AR-COV-1 AR-COV-9 BL70 BS-COV-27 BS-COV-57 BS-...
19,"PDE11A_2,PTPRU_1,FABP1_1,MTHFD1_1_homo",35,0.103175,0.8125,57.742857,True,11.743309,38.0,50.5,58.0,...,0.0,0.571429,0.228571,0.371429,0.114286,0.028571,0.142857,0.085714,0.142857,AR-COV-11 AR-COV-15 AR-COV-4 BS-COV-11 BS-COV-...
14,"IRX5_1,PPP1R9A_1,EPCAM_1,TP53_1_homo",36,0.103175,0.83871,60.805556,False,14.498696,19.0,52.0,58.5,...,0.055556,0.555556,0.277778,0.5,0.083333,0.166667,0.25,0.138889,0.166667,AR-COV-4 BS-COV-32 BS-COV-59 SM-COV-30 LS-COV-...
3,"IRX5_1,PPP1R9A_1,C1orf109_1,MTHFD1_1_homo",41,0.119048,0.810811,59.195122,False,12.599302,34.0,50.0,58.0,...,0.02439,0.463415,0.195122,0.317073,0.146341,0.097561,0.170732,0.097561,0.195122,AR-COV-11 AR-COV-2 AR-COV-4 BS-COV-11 BS-COV-3...
18,"PDE11A_2,FABP1_1,APOBEC1_1_homo,CCDC61_1_homo,...",34,0.103175,0.8125,56.5,True,13.543503,25.0,47.0,57.5,...,0.058824,0.558824,0.264706,0.323529,0.058824,0.058824,0.235294,0.058824,0.088235,AR-COV-11 AR-COV-15 BS-COV-11 BS-COV-31 BS-COV...


In [17]:
description_sev.to_csv('./females_severity_stats_full.csv',index=True, sep=';')

# Statistical analysis of top protection rules

In [18]:
pheno = pd.read_csv('./Data/phenotypes_1319.csv', sep=';')
pheno.set_index('PatientID', inplace=True)
pheno0 = pheno[pheno['grading_1319_adj_a_s']=='0']

pat_index = pd.read_csv('./Data/AssociationRules/All_Female_bool_index.csv')
pat_index_0 = pat_index[pat_index['grading']=='0']

In [19]:
pheno0.rename(columns={'diabetes (1=affected, 0=not affected, N/A= data not available)':'Diabetes',
                     'hypertension (1=affected, 0=not affected, N/A= data not available)':'Hypertension',
                     'cancer  (1=affected, 0=not affected, N/A= data not available)':'Cancer',
                     'Autoimmune disease (1 = Yes, 0 = No)':'Autoimmune disease',
                     'Congestive/Ischemic Heart Failure (1= Yes; 0=No)':'Congestive/Ischemic Heart Failure',
                     'Obesity (1= Yes; 0=No)':'Obesity',
                     'Dyslipidemia (1= Yes; 0=No)':'Dyslipidemia',
                     'Hypothyroidism (1= Yes; 0=No)':'Hypothyroidism',
                     'Asthma/COPD/OSAS (1= Yes; 0=No)':'Asthma/COPD/OSAS',
                      'comorbidity_coded (1 = any comorbidity, 0 = none)' : 'comorbidity_coded'}, inplace=True)

In [20]:
series = []
genes = []
patients = []

for i in range(len(sel_prot)):
    genes.append(sel_prot.iloc[i]['lhs'].split(','))
    series.append(pd.Series((pat_index_0[genes[i]].values == 1).all(axis=1), name='bools'))
    patients.append(pat_index_0[series[i].values].PatientID.values)

In [21]:
d_prot = {'lhs':[],
    'count' : [],
    'support' : [],
    'confidence' : [],
    'mean_age' : [],
    'above_average' : [],
    'std_age' : [],
    'min_age' : [],
    '25%_age' : [],
    '50%_age' : [],
    '75%_age' : [],
    'max_age' : [],
    'mean_grading' : [],
    'std_grading' : [],
    '0' : [],
    '1' : [],
    '2' : [],
    '3' : [],
    '4' : [],
    'score' : [],
    'blood_group' : [],
    'diabetes' : [],
    'hypertension' : [],
    'cancer' : [],
    'autoimmune_disease' : [],
    'congestive_ischemic_heart_failure' : [],
    'asthma' : [],
    'hypothyroidism' : [],
    'obesity' : [],
    'dyslipidemia' : [],
    'comorbidity_coded' : [],
    'heart' : [],
    'liver' : [],
    'pancreas' : [],
    'Lymphoid System' : [],
    'Proinflammatory Cytokines' : [], 
    'Clotting system' : [],
    'kidney' : [],         
    'patients' : []
    }

In [22]:
for i in range(len(sel_prot)): 
    d_prot['lhs'].append(sel_prot.iloc[i].lhs)
    d_prot['support'].append(sel_prot.iloc[i]['support'])
    d_prot['confidence'].append(sel_prot.iloc[i]['confidence'])
    d_prot['patients'].append(' '.join(patients[i]))
    
    selected_prot = pheno0.loc[patients[i]]
    d_prot['count'].append(len(selected_prot))
    
    d_prot['mean_age'].append(np.mean(selected_prot['Age']))
    d_prot['std_age'].append(np.std(selected_prot['Age']))
    d_prot['min_age'].append(np.amin(selected_prot['Age']))
    d_prot['max_age'].append(np.amax(selected_prot['Age']))
    d_prot['above_average'].append(True if np.mean(selected_prot['Age']) >= 59 else False)
    d_prot['25%_age'].append(np.percentile(selected_prot['Age'], 25))
    d_prot['50%_age'].append(np.percentile(selected_prot['Age'], 50))
    d_prot['75%_age'].append(np.percentile(selected_prot['Age'], 75))
    
    d_prot['mean_grading'].append(np.mean(selected_prot['grading']))
    d_prot['std_grading'].append(np.std(selected_prot['grading']))
    
    d_prot['0'].append(np.count_nonzero(selected_prot['grading'] == 0))
    d_prot['1'].append(np.count_nonzero(selected_prot['grading'] == 1))
    d_prot['2'].append(np.count_nonzero(selected_prot['grading'] == 2))
    d_prot['3'].append(np.count_nonzero(selected_prot['grading'] == 3))
    d_prot['4'].append(np.count_nonzero(selected_prot['grading'] == 4))
    
    d_prot['score'].append(np.mean(selected_prot['Age']) * (4-np.mean(selected_prot['grading'])))
    
    d_prot['blood_group'].append(sp.stats.mode(selected_prot['Blood Group'])[0][0])
    
    if len(selected_prot['Diabetes'].value_counts()) > 1 :
        d_prot['diabetes'].append(selected_prot['Diabetes'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['diabetes'].append(0)
        
    if len(selected_prot['Hypertension'].value_counts()) > 1 :
        d_prot['hypertension'].append(selected_prot['Hypertension'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['cancer'].append(0)
    
    if len(selected_prot['Cancer'].value_counts()) > 1 :
        d_prot['cancer'].append(selected_prot['Cancer'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['cancer'].append(0)
        
    d_prot['autoimmune_disease'].append(selected_prot['Autoimmune disease'].value_counts()[1]/len(selected_prot))
    
    if len(selected_prot['Congestive/Ischemic Heart Failure'].value_counts()) > 1 :
        d_prot['congestive_ischemic_heart_failure'].append(selected_prot['Congestive/Ischemic Heart Failure'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['congestive_ischemic_heart_failure'].append(0)
        
    if len(selected_prot['Asthma/COPD/OSAS'].value_counts()) > 1 :
        d_prot['asthma'].append(selected_prot['Asthma/COPD/OSAS'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['asthma'].append(0)
    
    if len(selected_prot['Hypothyroidism'].value_counts()) > 1 :
        d_prot['hypothyroidism'].append(selected_prot['Hypothyroidism'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['hypothyroidism'].append(0)
        
    if len(selected_prot['Dyslipidemia'].value_counts()) > 1 :
        d_prot['dyslipidemia'].append(selected_prot['Dyslipidemia'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['dyslipidemia'].append(0)
    
    if len(selected_prot['Obesity'].value_counts()) > 1 :
        d_prot['obesity'].append(selected_prot['Obesity'].value_counts()[1]/len(selected_prot))
    else:
        d_prot['obesity'].append(0)
        
    d_prot['heart'].append(len(selected_prot[selected_prot['heart'].str.contains('Yes',case=False, na=False)])/len(selected_prot))
    d_prot['liver'].append(len(selected_prot[(selected_prot['liver'].str.contains('yes',case=False, na=False))])/len(selected_prot))
    d_prot['Proinflammatory Cytokines'].append(len(selected_prot[(selected_prot['Proinflammatory Cytokines']\
                                                                .str.contains('yes',case=False,na=False))])/len(selected_prot))
    d_prot['pancreas'].append(len(selected_prot[(selected_prot['pancreas'].str.contains('yes',case=False,na=False))])/len(selected_prot))
    d_prot['kidney'].append(len(selected_prot[(selected_prot['kidney'].str.contains('yes',case=False,na=False))])/len(selected_prot))
    d_prot['Lymphoid System'].append(len(selected_prot[(selected_prot['Lymphoid System'].str.contains('yes',case=False,na=False))])/len(selected_prot))
    d_prot['Clotting system'].append(len(selected_prot[(selected_prot['Clotting system'].str.contains('yes',case=False,na=False))])/len(selected_prot))
    
    d_prot['comorbidity_coded'].append(selected_prot['comorbidity_coded'].value_counts()[1]/len(selected_prot))

In [23]:
description_prot = pd.DataFrame(data=d_prot)

In [24]:
description_prot.sort_values(by=['score'], ascending=True)

Unnamed: 0,lhs,count,support,confidence,mean_age,above_average,std_age,min_age,25%_age,50%_age,...,dyslipidemia,comorbidity_coded,heart,liver,pancreas,Lymphoid System,Proinflammatory Cytokines,Clotting system,kidney,patients
8,"LSR_1,GJE1_1,RBM11_1_homo,APOBEC1_1_homo",31,0.095238,0.8,59.096774,True,11.737326,47.0,50.5,58.0,...,0.032258,0.483871,0.032258,0.032258,0.032258,0.032258,0.0,0.032258,0.064516,BL52 BS-COV-104 BS-COV-105 BL104 SI-COV-30 RUF...
18,"LSR_1,HAPLN3_2,TMEM40_2,RBM11_1_homo,POLI_1_homo",29,0.087302,0.846154,58.241379,False,12.604474,47.0,49.0,54.0,...,0.068966,0.448276,0.068966,0.034483,0.0,0.0,0.0,0.0,0.034483,VAL-COV-9 VAL-COV-10 BL104 RUF-COV18 CR-COV-48...
14,"LSR_1,GSTM3_1,GJE1_1,APOBEC1_1_homo",28,0.087302,0.814815,61.071429,True,14.461596,47.0,49.75,58.5,...,0.0,0.464286,0.035714,0.0,0.071429,0.035714,0.0,0.071429,0.071429,BL52 VAL-COV-11 BL104 SI-COV-30 RUF-COV16 CR-C...
3,"RNF149_1,TMEM40_2,RBM11_1_homo,POLI_1_homo,APO...",34,0.107143,0.818182,61.588235,True,13.631964,48.0,50.0,58.0,...,0.0,0.470588,0.147059,0.029412,0.0,0.029412,0.0,0.029412,0.117647,VAL-COV-1 VAL-COV-9 VAL-COV-10 RUF-COV18 RUF-C...
9,"LSR_1,NCOA3_1,TMEM40_2,RBM11_1_homo,APOBEC1_1_...",29,0.095238,0.827586,60.551724,True,13.291393,47.0,50.0,58.0,...,0.034483,0.344828,0.034483,0.0,0.0,0.0,0.0,0.034483,0.0,BS-COV-105 VAL-COV-3 BL104 RUF-COV17 RUF-COV18...
17,"LSR_1,TMEM40_2,RBM11_1_homo,APOBEC1_1_homo,COQ...",31,0.087302,0.814815,63.096774,True,15.631554,48.0,49.5,58.0,...,0.032258,0.419355,0.064516,0.032258,0.032258,0.064516,0.032258,0.0,0.096774,VAL-COV-3 TV-COV-102 SI-COV-30 RUF-COV17 RUF-C...
12,"LSR_1,SLC24A3_1,TMEM40_2,RBM11_1_homo,CFHR4_1_...",29,0.09127,0.821429,64.37931,True,14.101635,47.0,54.0,59.0,...,0.068966,0.37931,0.034483,0.0,0.0,0.034483,0.034483,0.034483,0.068966,BS-COV-105 VAL-COV-3 TV-COV-102 BL104 CR-COV-4...
7,"LSR_1,IGFBP2_1,RBM11_1_homo,APOBEC1_1_homo,COQ...",31,0.095238,0.857143,63.290323,True,15.452724,48.0,51.0,59.0,...,0.032258,0.354839,0.032258,0.0,0.032258,0.032258,0.032258,0.0,0.064516,VAL-COV-3 TV-COV-102 SI-COV-30 BL116 RUF-COV17...
2,"LSR_1,TMEM40_2,RBM11_1_homo,CFHR4_1_homo,APOBE...",35,0.107143,0.818182,63.857143,True,14.404648,47.0,52.5,59.0,...,0.057143,0.371429,0.057143,0.0,0.0,0.028571,0.028571,0.028571,0.057143,BS-COV-105 VAL-COV-3 VAL-COV-9 TV-COV-102 BL10...
0,"LSR_1,MYO1H_1,POLI_1_homo,APOBEC1_1_homo",39,0.115079,0.805556,63.076923,True,14.287137,47.0,53.0,59.0,...,0.0,0.487179,0.076923,0.051282,0.0,0.0,0.051282,0.025641,0.076923,BL53 BS-COV-101 COV-CHIANC-5 VAL-COV-10 BL104 ...


In [25]:
description_prot.to_csv('./females_protection_stats_full.csv',index=True, sep=';')