In [1]:
import os 
import pandas as pd

from pandas_profiling import ProfileReport
from pandas_profiling.utils.cache import cache_file
from collections import Counter
import seaborn as sn


import numpy as np

In [2]:
box_file_dir = os.path.join(os.getcwd(), "..", "..", "Box")
file_path_csv = os.path.join(box_file_dir, "covid_pts_enc_level_labs_dx_2021-02-02_deid.csv")

In [3]:
df = pd.read_csv(file_path_csv, index_col=False)

In [4]:
df['zip_cust_table'].dtypes

dtype('O')

In [5]:
def latinx(row):
    if row.ethnicity_display == 'Hispanic or Latino' and row.race_display == 'White':
        return "Hispanic"
    elif row.ethnicity_display == 'Not Hispanic or Latino' and row.race_display == 'White': 
        return "White"
    else:
        return row.race_display

In [6]:
df.dtypes

admit_dt_tm                object
discharge_dt_tm            object
facility                   object
encounter_type_display     object
age_at_admit                int64
                           ...   
cancer_com_flag            object
qSOFA_score               float64
deid_empi_encounter        object
deid_mrn_encounter          int64
deid_fin_encounter          int64
Length: 70, dtype: object

In [7]:
df['race_display'] = df.apply(lambda row: latinx(row), axis=1)

In [8]:
zip_df = pd.read_csv('zip_code_data.csv', index_col=False)

In [9]:
zip_df['zip_code'] = zip_df.zip_code.apply(lambda x: x.strip('ZCTA5 '))

In [10]:
zip_df.head()

Unnamed: 0,zip_code,male_poverty_rate,female_poverty_rate,median_income,mean_income,poverty_rate
0,75001,3.8,11.4,72512,87472,10.9
1,75002,1.4,5.2,101429,125779,4.9
2,75006,2.1,12.3,64462,85788,10.8
3,75007,1.8,6.6,86648,102783,6.7
4,75009,1.2,6.1,111146,144513,4.0


In [11]:
df.columns

Index(['admit_dt_tm', 'discharge_dt_tm', 'facility', 'encounter_type_display',
       'age_at_admit', 'gender', 'zip_cust_table', 'ethnicity_display',
       'race_display', 'race_eth_comb', 'length_of_stay', 'icd_prim',
       'deceased_dt_tm', 'discharge_disposition_display', 'dc_dispo_cat',
       'insured_uninsured', 'admit_source', 'arrival_mode',
       'vent_hours_summed', 'vent_first_order_start_time',
       'vent_last_order_end_time', 'icu_hours_summed', 'icu_first_begin_time',
       'icu_last_end_time', 'occupation', 'occ_cat', 'emp_yn', 'Creatinine',
       'Abs_Lymph', 'ALT', 'AST', 'Bili_Total', 'D-Dimer',
       'Diastolic_Blood_Pressure', 'Dosing_Weight_(kg)', 'FiO2/Percent',
       'Glasgow_Coma_Score', 'Lymph_Auto', 'Oxygen_Delivery_Method',
       'Oxygen_Saturation', 'Platelet', 'pO2_Art', 'Procalcitonin',
       'Respiratory_Rate', 'Systolic_Blood_Pressure', 'Troponin-I', 'WBC',
       'Oxygen_Flow_Rate', 'Patient_Smoking_Status', 'Visit_Reason',
       'Height_(i

In [12]:
icu_df = df[~df['icu_hours_summed'].isnull()]

In [13]:
icu_df= icu_df[~icu_df['qSOFA_score'].isnull()]

In [14]:
len(icu_df)

716

In [15]:
demo_df = icu_df[['zip_cust_table', 'ethnicity_display', 'discharge_disposition_display', 'race_display', 'race_eth_comb', 'heart_pulse_comb', 'temp_oral_degC', 'calc_pulled_BMI', 'no_dx_data',
       'COPD_com_flag', 'asthma_com_flag', 'diabetes_com_flag',
       'hypertension_com_flag', 'CAD_com_flag', 'heartfailure_com_flag',
       'CKD_com_flag', 'cancer_com_flag', 'qSOFA_score' ]]

In [16]:
all_df = pd.merge(demo_df, zip_df, left_on='zip_cust_table', right_on='zip_code', how='inner')

In [17]:
len(all_df['zip_cust_table'].unique())

97

In [18]:
all_df.columns

Index(['zip_cust_table', 'ethnicity_display', 'discharge_disposition_display',
       'race_display', 'race_eth_comb', 'heart_pulse_comb', 'temp_oral_degC',
       'calc_pulled_BMI', 'no_dx_data', 'COPD_com_flag', 'asthma_com_flag',
       'diabetes_com_flag', 'hypertension_com_flag', 'CAD_com_flag',
       'heartfailure_com_flag', 'CKD_com_flag', 'cancer_com_flag',
       'qSOFA_score', 'zip_code', 'male_poverty_rate', 'female_poverty_rate',
       'median_income', 'mean_income', 'poverty_rate'],
      dtype='object')

In [19]:
all_df[['COPD_com_flag', 'asthma_com_flag', 'diabetes_com_flag',
       'hypertension_com_flag', 'CAD_com_flag', 'heartfailure_com_flag',
       'CKD_com_flag', 'cancer_com_flag']] = all_df[['COPD_com_flag', 'asthma_com_flag', 'diabetes_com_flag',
       'hypertension_com_flag', 'CAD_com_flag', 'heartfailure_com_flag',
       'CKD_com_flag', 'cancer_com_flag']].fillna(0)

In [20]:
all_df[['COPD_com_flag', 'asthma_com_flag', 'diabetes_com_flag',
       'hypertension_com_flag', 'CAD_com_flag', 'heartfailure_com_flag',
       'CKD_com_flag', 'cancer_com_flag']] = all_df[['COPD_com_flag', 'asthma_com_flag', 'diabetes_com_flag',
       'hypertension_com_flag', 'CAD_com_flag', 'heartfailure_com_flag',
       'CKD_com_flag', 'cancer_com_flag']].astype(int)

In [21]:
def comorbidity_count(row):
    count = 0 
    
    if row.COPD_com_flag == 1:
        count += 1 
        
    if row.asthma_com_flag == 1:
        count += 1 
        
    if row.diabetes_com_flag == 1:
        count += 1 
        
    if row.hypertension_com_flag == 1:
        count += 1 
        
    if row.CAD_com_flag == 1:
        count += 1 
        
    if row.heartfailure_com_flag == 1: 
        count += 1 
        
    if row.CKD_com_flag == 1:
        count += 1 
        
    if row.cancer_com_flag == 1:
        count += 1 
        
    return count

In [22]:
all_df['total_comorbidities'] = all_df.apply(lambda row: comorbidity_count(row), axis=1)

In [23]:
all_df

Unnamed: 0,zip_cust_table,ethnicity_display,discharge_disposition_display,race_display,race_eth_comb,heart_pulse_comb,temp_oral_degC,calc_pulled_BMI,no_dx_data,COPD_com_flag,...,CKD_com_flag,cancer_com_flag,qSOFA_score,zip_code,male_poverty_rate,female_poverty_rate,median_income,mean_income,poverty_rate,total_comorbidities
0,78130,Hispanic or Latino,Expired,Hispanic,White Hispanic,97.0,,30.620000,False,0,...,0,0,3.0,78130,1.5,10,65627,84515,9.5,0
1,78130,Hispanic or Latino,Rehab Care,Hispanic,White Hispanic,101.0,37.2,27.760000,False,0,...,1,0,2.0,78130,1.5,10,65627,84515,9.5,2
2,78130,Hispanic or Latino,Skilled Nursing Care,Hispanic,White Hispanic,98.0,36.9,,False,0,...,0,0,1.0,78130,1.5,10,65627,84515,9.5,3
3,78130,Unknown,Still a patient,White,White Unknown Ethnicity,98.0,36.7,,False,0,...,0,0,0.0,78130,1.5,10,65627,84515,9.5,0
4,78612,Not Hispanic or Latino,Home,White,White Non Hispanic,99.0,37.4,44.890000,False,0,...,0,0,1.0,78612,6.3,10.9,69213,80458,10.6,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647,78940,Not Hispanic or Latino,Home,Black or African American,Other race Non Hispanic,66.0,36.8,32.830000,False,0,...,0,0,1.0,78940,5,14.3,57500,80110,12,2
648,79762,Hispanic or Latino,Home,Other Race,Other Race Hispanic,91.0,36.9,49.153355,False,0,...,0,0,0.0,79762,2,9.5,66584,88737,9.1,0
649,78132,Not Hispanic or Latino,Home,White,White Non Hispanic,87.0,37.4,19.280000,False,0,...,0,0,1.0,78132,2,4.2,106523,135706,3.9,0
650,76527,Unknown,Home,White,White Unknown Ethnicity,88.0,37.7,33.163386,False,0,...,0,0,1.0,76527,6.2,14,54940,80699,13.6,3


In [24]:
#zip_df['median_income'] = zip_df['median_income'].apply(lambda x: x.replace('-','00').replace('+', ''))

In [25]:
#zip_df['median_income'] = zip_df['median_income'].astype(int)

In [26]:
out_df = all_df.value_counts(subset=['zip_cust_table', 'total_comorbidities'], sort=True).reset_index().rename(columns={0: 'count'})
new_out_df = pd.merge(out_df, zip_df[['zip_code','median_income', 'mean_income', 'poverty_rate']], left_on='zip_cust_table', right_on='zip_code', how='inner')

new_out_df = new_out_df[['zip_cust_table', 'total_comorbidities', 'count', 'median_income',
       'mean_income', 'poverty_rate']].astype(float)

new_out_df['below_state_median_income'] = new_out_df.median_income.apply(lambda x: 'yes' if x < 64034 else 'no')

new_out_df.sort_values(by=['total_comorbidities']).to_csv('tot_com_zip_dist.csv')
new_out_df.sort_values(by=['total_comorbidities', 'count'], ascending=[False, False]).to_csv('tot_com_zip_dist_high.csv')
new_out_df.sort_values(by=['median_income', 'total_comorbidities','count'], ascending=[True, False, False]).to_csv('tot_com_zip_dist_income.csv')



In [27]:
out_df = all_df.groupby(['race_display'])['COPD_com_flag', 'asthma_com_flag', 'diabetes_com_flag',
       'hypertension_com_flag', 'CAD_com_flag', 'heartfailure_com_flag',
       'CKD_com_flag', 'cancer_com_flag'].apply(lambda x : x.astype(int).sum()).reset_index()
#new_out_df = pd.merge(out_df, zip_df[['zip_code','median_income', 'mean_income', 'poverty_rate']], left_on='zip_cust_table', right_on='zip_code', how='inner')

#new_out_df = new_out_df[['zip_cust_table', 'total_comorbidities', 'count', 'median_income',
#       'mean_income', 'poverty_rate']].astype(float)

#new_out_df['below_state_median_income'] = new_out_df.median_income.apply(lambda x: 'yes' if x < 64034 else 'no')

out_df.to_csv('com_race.csv')
#new_out_df.sort_values(by=['total_comorbidities', 'count'], ascending=[False, False]).to_csv('tot_com_zip_dist_high.csv')
#new_out_df.sort_values(by=['median_income', 'total_comorbidities','count'], ascending=[True, False, False]).to_csv('tot_com_zip_dist_income.csv')


  """Entry point for launching an IPython kernel.


In [28]:
# number of patients reported comorbidity 

In [50]:
all_df['has_comorbidity'] = all_df.total_comorbidities.apply(lambda x: 1 if x >= 1 else 0)
all_df['has_comorbidity2'] = all_df.total_comorbidities.apply(lambda x: 1 if x >= 2 else 0)
all_df['death'] = all_df.discharge_disposition_display.apply(lambda x: 1 if x == 'Expired' else 0)

In [51]:
c = Counter(all_df['race_display'])

In [52]:
c

Counter({'Hispanic': 331,
         'White': 216,
         'Other Race': 25,
         'Decline to Specify': 4,
         'Black or African American': 51,
         'American Indian/Alaska Native': 4,
         'Asian': 5,
         'Unknown': 15,
         'Native Hawaiian or Other Pacific Islande': 1})

In [53]:
race_df = pd.DataFrame(c.items(), columns=['race_display', 'number_of_patients'])

In [54]:
race_df

Unnamed: 0,race_display,number_of_patients
0,Hispanic,331
1,White,216
2,Other Race,25
3,Decline to Specify,4
4,Black or African American,51
5,American Indian/Alaska Native,4
6,Asian,5
7,Unknown,15
8,Native Hawaiian or Other Pacific Islande,1


In [55]:
race_df['number_of_patients'].sum()

652

In [56]:
death_df = all_df.groupby(['race_display'])['death'].apply(lambda x : x.astype(int).sum()).reset_index()

In [36]:
death_df

Unnamed: 0,race_display,death
0,American Indian/Alaska Native,1
1,Asian,3
2,Black or African American,15
3,Decline to Specify,3
4,Hispanic,76
5,Native Hawaiian or Other Pacific Islande,0
6,Other Race,6
7,Unknown,7
8,White,37


In [57]:
como_df = all_df.groupby(['race_display'])['has_comorbidity'].apply(lambda x : x.astype(int).sum()).reset_index()

In [58]:
como_race_df = pd.merge(como_df, race_df)

In [59]:
como_race_df['percentage_with_como'] = como_race_df.apply(lambda row: (row.has_comorbidity / row.number_of_patients) * 100, axis=1)

In [60]:
como_df2 = all_df.groupby(['race_display'])['has_comorbidity2'].apply(lambda x : x.astype(int).sum()).reset_index()
como_race_df2 = pd.merge(como_df2, race_df)
como_race_df2['percentage_with_como'] = como_race_df2.apply(lambda row: (row.has_comorbidity2 / row.number_of_patients) * 100, axis=1)

In [61]:
death_race_df = pd.merge(death_df, race_df)

In [62]:
death_race_df['percentage_death'] = death_race_df.apply(lambda row: (row.death / row.number_of_patients) * 100, axis=1)

In [63]:
death_race_df = death_race_df.sort_values(by=['percentage_death'], ascending = False)

In [64]:
death_race_df.columns

Index(['race_display', 'death', 'number_of_patients', 'percentage_death'], dtype='object')

In [65]:
death_race_df[['race_display', 'death', 'number_of_patients',
       'percentage_death']]

Unnamed: 0,race_display,death,number_of_patients,percentage_death
3,Decline to Specify,3,4,75.0
1,Asian,3,5,60.0
7,Unknown,7,15,46.666667
2,Black or African American,15,51,29.411765
0,American Indian/Alaska Native,1,4,25.0
6,Other Race,6,25,24.0
4,Hispanic,76,331,22.960725
8,White,37,216,17.12963
5,Native Hawaiian or Other Pacific Islande,0,1,0.0


In [66]:
como_race_df = como_race_df.sort_values(by=['percentage_with_como'], ascending = False)

In [67]:
como_race_df2 = como_race_df2.sort_values(by=['percentage_with_como'], ascending = False)

In [46]:
# From Min statistically test means of the groups 
# feature engineer race and ethnicty for Latinx (phenotypically shown white)
# test difference for white hispanic and non white hispanic 
# stats testing should showcase the differences (or not) as the data is large to just observe
# qualitatively 


In [68]:
como_race_df

Unnamed: 0,race_display,has_comorbidity,number_of_patients,percentage_with_como
1,Asian,5,5,100.0
2,Black or African American,46,51,90.196078
7,Unknown,13,15,86.666667
6,Other Race,21,25,84.0
8,White,169,216,78.240741
4,Hispanic,253,331,76.435045
0,American Indian/Alaska Native,3,4,75.0
3,Decline to Specify,3,4,75.0
5,Native Hawaiian or Other Pacific Islande,0,1,0.0


In [69]:
como_race_df2

Unnamed: 0,race_display,has_comorbidity2,number_of_patients,percentage_with_como
2,Black or African American,36,51,70.588235
1,Asian,3,5,60.0
7,Unknown,9,15,60.0
8,White,120,216,55.555556
6,Other Race,13,25,52.0
4,Hispanic,170,331,51.359517
0,American Indian/Alaska Native,2,4,50.0
3,Decline to Specify,2,4,50.0
5,Native Hawaiian or Other Pacific Islande,0,1,0.0


In [48]:
all_df.groupby(['race_display'])['has_comorbidity'].apply(lambda x : x.astype(int).sum())

race_display
American Indian/Alaska Native                 3
Asian                                         5
Black or African American                    46
Decline to Specify                            3
Hispanic                                    253
Native Hawaiian or Other Pacific Islande      0
Other Race                                   21
Unknown                                      13
White                                       169
Name: has_comorbidity, dtype: int64

In [49]:
all_como = all_df.groupby(['race_display'])['COPD_com_flag', 'asthma_com_flag', 'diabetes_com_flag',
       'hypertension_com_flag', 'CAD_com_flag', 'heartfailure_com_flag',
       'CKD_com_flag', 'cancer_com_flag'].apply(lambda x : x.astype(int).sum()).reset_index()
pd.merge(all_como, race_df)

  """Entry point for launching an IPython kernel.


Unnamed: 0,race_display,COPD_com_flag,asthma_com_flag,diabetes_com_flag,hypertension_com_flag,CAD_com_flag,heartfailure_com_flag,CKD_com_flag,cancer_com_flag,number_of_patients
0,American Indian/Alaska Native,0,0,1,1,0,0,1,2,4
1,Asian,1,0,4,1,0,1,3,0,5
2,Black or African American,6,5,31,20,11,15,22,3,51
3,Decline to Specify,0,0,2,3,0,0,0,0,4
4,Hispanic,11,19,180,154,29,42,55,24,331
5,Native Hawaiian or Other Pacific Islande,0,0,0,0,0,0,0,0,1
6,Other Race,0,2,12,20,1,2,0,0,25
7,Unknown,0,1,9,7,3,2,6,1,15
8,White,26,20,93,96,37,33,36,17,216


In [50]:
out_df = all_df.value_counts(subset=['race_display', 'total_comorbidities'], sort=True).reset_index().rename(columns={0: 'count'})
#new_out_df = pd.merge(out_df, zip_df[['zip_code','median_income', 'mean_income', 'poverty_rate']], left_on='zip_cust_table', right_on='zip_code', how='inner')

#new_out_df = new_out_df[['zip_cust_table', 'total_comorbidities', 'count', 'median_income',
#       'mean_income', 'poverty_rate']].astype(float)

#new_out_df['below_state_median_income'] = new_out_df.median_income.apply(lambda x: 'yes' if x < 64034 else 'no')

out_df.sort_values(by=['total_comorbidities'], ascending=False).to_csv('tot_com_race.csv')
#new_out_df.sort_values(by=['total_comorbidities', 'count'], ascending=[False, False]).to_csv('tot_com_zip_dist_high.csv')
#new_out_df.sort_values(by=['median_income', 'total_comorbidities','count'], ascending=[True, False, False]).to_csv('tot_com_zip_dist_income.csv')



In [51]:
out_df = all_df.value_counts(subset=['zip_cust_table', 'qSOFA_score'], sort=True).reset_index().rename(columns={0: 'count'})

In [52]:
new_out_df = pd.merge(out_df, zip_df[['zip_code','median_income', 'mean_income', 'poverty_rate']], left_on='zip_cust_table', right_on='zip_code', how='inner')

In [53]:
new_out_df.columns

Index(['zip_cust_table', 'qSOFA_score', 'count', 'zip_code', 'median_income',
       'mean_income', 'poverty_rate'],
      dtype='object')

In [54]:
new_out_df = new_out_df[['zip_cust_table', 'qSOFA_score', 'count', 'median_income',
       'mean_income', 'poverty_rate']].astype(float)

In [55]:
new_out_df['below_state_median_income'] = new_out_df.median_income.apply(lambda x: 'yes' if x < 64034 else 'no')

In [56]:
out_df2 = all_df.value_counts(subset=['race_display', 'qSOFA_score'], sort=True).reset_index().rename(columns={0: 'count'})

In [57]:
out_df2.sort_values(by=['qSOFA_score'], ascending=False)

Unnamed: 0,race_display,qSOFA_score,count
12,Hispanic,3.0,8
13,White,3.0,6
4,Hispanic,2.0,56
6,White,2.0,28
9,Black or African American,2.0,11
15,Unknown,2.0,4
0,Hispanic,1.0,175
17,American Indian/Alaska Native,1.0,3
20,Native Hawaiian or Other Pacific Islande,1.0,1
19,Asian,1.0,2


In [58]:
new_out_df.sort_values(by=['qSOFA_score']).to_csv('qSOFA_zip_dist.csv')

In [59]:
new_out_df.sort_values(by=['qSOFA_score', 'count'], ascending=[False, False]).to_csv('qSOFA_zip_dist_high.csv')

In [60]:
new_out_df.sort_values(by=['median_income', 'qSOFA_score','count'], ascending=[True, False, False]).to_csv('qSOFA_zip_dist_income.csv')

In [61]:
all_df[['zip_cust_table', 'qSOFA_score']].set_index(['zip_cust_table']).count(level="zip_cust_table")

TypeError: Can only count levels on hierarchical index.

In [None]:
all_df[['zip_cust_table', 'qSOFA_score']].groupby(['zip_cust_table', 'qSOFA_score']).agg(['count'])

In [None]:
#fig = df.hist(figsize=(50, 30))
import matplotlib
import matplotlib.pyplot as plt

params = {'axes.titlesize':'24',
          'xtick.labelsize':'24',
          'ytick.labelsize':'24'}
matplotlib.rcParams.update(params)


fig = all_df.qSOFA_score.hist(by=all_df.zip_cust_table, figsize=(50, 30))
[x.title.set_size(24) for x in fig.ravel()]

plt.savefig('example.pdf')  

In [None]:
Counter(all_df['zip_cust_table'])

In [None]:
df.head()

In [None]:
icu_df = df[~df['icu_hours_summed'].isnull()]

In [None]:
Counter(icu_df['qSOFA_score'])

In [None]:
Counter(icu_df['pO2_Art'].values)

In [None]:
sorted(icu_df.columns)

In [None]:
working_df = icu_df[~icu_df['qSOFA_score'].isnull()] 
working_df

In [None]:
data = icu_df[['age_at_admit', 'pO2_Art', 
               'qSOFA_score','race_display',
              'vent_hours_summed', 'zip_cust_table', 'heartfailure_com_flag',
              'cancer_com_flag','gender','WBC','Mean_Arterial_Pressure',
              'Bili_Total','CAD_com_flag','CKD_com_flag','COPD_com_flag',
 'Creatinine', 'FiO2/Percent','Glasgow_Coma_Score','diabetes_com_flag',
 'hypertension_com_flag','length_of_stay','discharge_disposition_display','Platelet', 'deid_empi_encounter']]

In [None]:
data.head()

In [None]:
# only 236 patients with all tests
allo_df = data[['pO2_Art', 'Creatinine', 'FiO2/Percent', 
      'Glasgow_Coma_Score', 'Platelet', 'Mean_Arterial_Pressure',
     'Bili_Total', 'deid_empi_encounter']].dropna()

In [None]:
list_of_patients = list(allo_df['deid_empi_encounter'])

In [None]:
adjusted_patients = data[data['deid_empi_encounter'].isin(list_of_patients)]

In [None]:
def calculate_sofa(row):
    count = 0
    
    # need to implement Fi02/po2
    
    if row.Platelet >= 100 and row.Platelet <= 149:
        count += 1 
        
    elif row.Platelet >= 50 and row.Platelet <= 99:
        count += 2
        
    elif row.Platelet >= 20 and row.Platelet <= 49:
        count += 3
        
    elif row.Platelet < 20:
        count += 4
       
    # Glasgow
    if row.Glasgow_Coma_Score == 13 or row.Glasgow_Coma_Score == 14:
        count += 1 
        
    elif row.Glasgow_Coma_Score >= 10 and row.Glasgow_Coma_Score <= 12:
        count += 2
        
    elif row.Glasgow_Coma_Score >= 6 and row.Glasgow_Coma_Score <= 9:
        count += 3
        
    elif row.Glasgow_Coma_Score < 6:
        count += 4
        
    # Bilirubin 
    
    if float(row.Bili_Total) >= 1.2 and float(row.Bili_Total) <= 1.9:
        count += 1 
        
    elif float(row.Bili_Total) >= 2.0 and float(row.Bili_Total) <= 5.9:
        count += 2
        
    elif float(row.Bili_Total) >= 6.0 and float(row.Bili_Total) <= 11.9:
        count += 3
        
    elif float(row.Bili_Total) >= 12.0:
        count += 4 
        
    # Need to implement Mean artieral pressure later 
    
    # Creatinine 
    
    if row.Creatinine >= 1.2 and row.Creatinine <= 1.9:
        count += 1 
        
    elif row.Creatinine >= 2.0 and row.Creatinine <= 3.4:
        count += 2
        
    elif row.Creatinine >= 3.5 and row.Creatinine <= 4.9:
        count += 3
        
    elif row.Creatinine >= 5.0:
        count += 4 
        
    return count 

In [None]:
allo_df['sofa'] = allo_df.apply(lambda row: calculate_sofa(row), axis = 1)

In [None]:
adjusted_patients['sofa'] = allo_df.apply(lambda row: calculate_sofa(row), axis = 1)

In [None]:
allo_df['sofa'].describe()

In [None]:
adjusted_patients['sofa'].describe()

In [None]:
#https://www.mdcalc.com/sequential-organ-failure-assessment-sofa-score#evidence
sofa_mortality_calibration = {
    0: 0,
    1: 0 ,
    2: 6.4,
    3: 6.4,
    4: 20.2,
    5: 20.2,
    6: 21.5,
    7: 21.5,
    8: 33.3,
    9: 33.3 ,
    10: 50.0,
    11: 50.0 ,
    12: 95.2,
    13: 95.2 ,
    14: 95.2  , 
    
}

In [None]:
qsofa_mortality_calibration = {
    0: 0.6,
    1: 5 ,
    2: 10,
    3: 24,
    
}

In [None]:
working_df.dtypes

In [None]:
def comorbidity_count(row):
    count = 0 
    
    if row.COPD_com_flag == 'TRUE':
        count += 1 
        
    if row.asthma_com_flag == 'TRUE':
        count += 1 
        
    if row.diabetes_com_flag == 'TRUE':
        count += 1 
        
    if row.hypertension_com_flag == 'TRUE':
        count += 1 
        
    if row.CAD_com_flag == 'TRUE':
        count += 1 
        
    if row.heartfailure_com_flag == 'TRUE': 
        count += 1 
        
    if row.CKD_com_flag == 'TRUE':
        count += 1 
        
    if row.cancer_com_flag == 'TRUE':
        count += 1 
        
    return count

In [None]:
working_df['comorbidity_count'] = working_df.apply(lambda row: comorbidity_count(row), axis=1)

In [None]:
working_df['cancer_com_flag'].dtype

In [None]:
working_df['life_years'] = working_df.age_at_admit.apply(lambda x: 100 - x)

In [None]:
Counter(adjusted_patients['discharge_disposition_display'])

In [None]:
np.nan

In [None]:
class Allocation(object):
    # Code will be adjusted for SOFA. Currently using qSOFA 
    # Only looking at State Level CSC for vent allocation 
    def __init__(self, patients, scarcity, sofa_calibration):
        self.patients = patients.copy() 
        self.patients['death'] = [0 for _ in range(len(self.patients))]
        self.patients['allocated_vent'] = ["no" for _ in range(len(self.patients))]
        
        self.num_vents = int(len(patients) * scarcity) 
        
        self.mortality_model = sofa_calibration
        
    def allocate(self, row):
        prob = self.mortality_model[row.qSOFA_score]
    
        
        death = np.random.binomial(size=1, n=1, p=prob*.01)[0]
        #print(death)
        
        if death == 1 or row.discharge_disposition_display == 'Expired':
            return death, 'yes'
        
        else:
            #print('yup yup')
            return death, 'yes'
    
    def check_expiration(self, df):

        temp_df = df.copy()
        for i, row in df.iterrows():
            row = row.copy()
            if (pd.isna(row.vent_hours_summed)) or row.discharge_disposition_display == 'Expired':
        
                temp_df.loc[i, 'death'] = 1

            else:
                
                temp_df.loc[i, 'death'] = 0
                
                
        return temp_df
    
    def __run_allocation(self, df2):
        
        for i, row in df2.iterrows():
            row = row.copy()
            if self.num_vents == 0:
                #print('out')
                break
            
            mortality, allocate_cond = self.allocate(row)
            
            df2.loc[i, 'death'] = mortality
                
            df2.loc[i, 'allocated_vent'] = allocate_cond
            
            self.num_vents -= 1 
        
        non_allocated = df2[df2['allocated_vent']=='no']
        allocated = df2[df2['allocated_vent']=='yes']
        
        #print(len(allocated))
        #print(Counter(allocated['death']))
        adj_df = self.check_expiration(non_allocated)
        #print(len(adj_df))
        #print(Counter(adj_df['death']))
        #adj_df = self.check_expiration(non_allocated)
        
        return pd.concat([allocated, adj_df])
        
                    
    def lottery(self):
        temp_patients = self.patients.copy()
        
        temp_patients.sample(frac=1)
        
        out_df = self.__run_allocation(temp_patients)
        return out_df 
    
    def youngest(self):
        temp_patients = self.patients.copy()
        
        temp_patients.sort_values(by=['age_at_admit'], ascending=True, inplace=True)
        
        out_df = self.__run_allocation(temp_patients)
        return out_df 
            
        
    def maryland(self):
        temp_patients = self.patients.copy()
        
        temp_patients.sort_values(by=['qSOFA_score', 'comorbidity_count'], 
                                      ascending=[False, False], inplace=True)
        
        out_df = self.__run_allocation(temp_patients)
        return out_df 
            
    def new_york(self):
        temp_patients = self.patients.copy()
        grouped = temp_patients.groupby('qSOFA_score').sum().reset_index()
        grouped.sort_values('qSOFA_score', ascending=False)
        
        out_df = self.__run_allocation(grouped)
        return out_df 
            
    
    def max_lives_saved(self):
        temp_patients = self.patients.copy()
        
        temp_patients.sort_values(by=['qSOFA_score'], ascending=False, inplace=True)
        
        out_df = self.__run_allocation(temp_patients)
        return out_df 
    
    def max_life_years(self):
        temp_patients = self.patients.copy()

        temp_patients.sort_values(by=['qSOFA_score', 'life_years'], ascending=[False,False], inplace=True)

        out_df = self.__run_allocation(temp_patients)
        return out_df 
        
    def sickest_first(self):
        temp_patients = self.patients.copy()
        
        temp_patients.sort_values(by=['qSOFA_score'], ascending=False, inplace=True)
        
        out_df = self.__run_allocation(temp_patients)
        return out_df 
        

In [None]:
working_df

In [None]:
iters = 10 
sums = 0
for _ in range(iters):
    allocate = Allocation(working_df, .50, qsofa_mortality_calibration)
    testing_df_lot = allocate.lottery()
    sums += testing_df['death'].sum()
    
avg_deaths = sums/iters
avg_deaths

In [None]:
iters = 10 
sums = 0
for _ in range(iters):
    allocate = Allocation(working_df, .50, qsofa_mortality_calibration)
    testing_df_life_years = allocate.max_life_years()
    sums += testing_df['death'].sum()
    
avg_deaths = sums/iters
avg_deaths

In [None]:
iters = 10 
sums = 0
for _ in range(iters):
    allocate = Allocation(working_df, .50, qsofa_mortality_calibration)
    testing_df_youngest = allocate.youngest()
    sums += testing_df['death'].sum()
    
avg_deaths = sums/iters
avg_deaths

In [None]:
iters = 10 
sums = 0
for _ in range(iters):
    allocate = Allocation(working_df, .50, qsofa_mortality_calibration)
    testing_df_mar = allocate.maryland()
    sums += testing_df['death'].sum()
    
avg_deaths = sums/iters
avg_deaths

In [None]:
iters = 10 
sums = 0
for _ in range(iters):
    allocate = Allocation(working_df, .50, qsofa_mortality_calibration)
    testing_df_ny = allocate.new_york()
    sums += testing_df['death'].sum()
    
avg_deaths = sums/iters
avg_deaths

In [None]:
iters = 10 
sums = 0
for _ in range(iters):
    allocate = Allocation(working_df, .50, qsofa_mortality_calibration)
    testing_df_lives = allocate.max_lives_saved()
    sums += testing_df['death'].sum()
    
avg_deaths = sums/iters
avg_deaths

In [None]:
iters = 10 
sums = 0
for _ in range(iters):
    allocate = Allocation(working_df, .50, qsofa_mortality_calibration)
    testing_df_sickest = allocate.sickest_first()
    sums += testing_df['death'].sum()
    
avg_deaths = sums/iters
avg_deaths

In [None]:
deaths_df_sick = testing_df_sickest[testing_df_sickest['death'] == 1]

In [None]:
Counter(deaths_df_sick['ethnicity_display'])

In [None]:
working_df

In [None]:
Counter(working_df['qSOFA_score'])

In [None]:
Counter(deaths_df_sick['zip_cust_table'])

In [None]:
deaths_df= testing_df_sickest[testing_df_sickest['death'] == 1]

In [None]:
Counter(deaths_df['ethnicity_display'])

In [None]:
Counter(deaths_df['zip_cust_table'])

In [None]:
deaths_df['age_at_admit'].hist()

In [None]:
testing_df

In [None]:
allocate = Allocation(working_df, .50, qsofa_mortality_calibration)
testing_df = allocate.max_life_years()

In [None]:
Counter(testing_df['comorbidity_count'])

In [None]:
testing_df.head()

In [None]:
check = testing_df[testing_df['allocated_vent'] == 'no']

In [None]:
testing_df[testing_df['death'] == 1]

In [None]:
testing_df[testing_df['death'] == 1][['vent_hours_summed','discharge_disposition_display', 'death']]

In [None]:
testing_df['death'].unique()

In [None]:
temp = check[['discharge_disposition_display', 'qSOFA_score', 'death']]
temp

In [None]:
temp[temp['death'] == 1]

In [None]:
Counter(temp['discharge_disposition_display'])

In [None]:
Counter(testing_df['allocated_vent'])

In [None]:
Counter(testing_df['death'])