## Preliminaries and Dataframe Construction

In [1]:
# Import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
import random

#Import Encounters from Database Query
df = pd.read_pickle("encounters.pkl")

#Formatting
plt.rcParams['font.family'] = 'Times New Roman'  # Set plt shows font to Times New Roman
plt.rcParams['axes.grid'] = True  # Ensure line graphs display on graphs
sns.set_palette(sns.color_palette('Set2')) #set color palette to a nice seaborn style https://seaborn.pydata.org/tutorial/color_palettes.html

print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3707 entries, 0 to 3706
Data columns (total 26 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   EncounterID       3707 non-null   object  
 1   SubjectID         3707 non-null   object  
 2   Age               3707 non-null   float64 
 3   Race              3707 non-null   object  
 4   Ethnicity         3707 non-null   object  
 5   Sex               3707 non-null   object  
 6   InitialSOFA       3707 non-null   int64   
 7   StayLength        3707 non-null   float64 
 8   CCS_raw           3707 non-null   int64   
 9   CCS_age           3707 non-null   int64   
 10  CCS_Colorado      3707 non-null   int64   
 11  ECI_raw           3707 non-null   int64   
 12  LE                3707 non-null   float64 
 13  Cho_LE            3707 non-null   float64 
 14  COVID_Status      3707 non-null   int8    
 15  Elective_Flag     3707 non-null   int64   
 16  Discharge_Status  3707 n

In [2]:

df_PosCOVID = (df
      .where(df["COVID_Status"]==1)
      .dropna()
      .rename(columns={'Survived':'Actual_Survival'})
      .reindex(columns=['EncounterID', 'Age', 'Race', 'Age_Group', 'COVID_Status', 'InitialSOFA', 'NY_Score', 'Maryland_Score', 'Colorado_Score', 'PA_Score', 'Actual_Survival'])
      .assign(Age_Group_N = lambda df_1:
                  df_1['Age_Group'].map({'<25': 1, '25-34': 2, '35-44': 3, '45-54': 4, '55-64': 5, '65-74': 6, '75-84': 7, '>85': 8}).astype(int),
            Maryland_Age = lambda df_1:
                  pd.cut(df_1['Age'], [0, 50, 70, 85, np.inf], labels=[1, 2, 3, 4])#.astype(int)
      )
)
#returns 1276 encounters where patient was COVID positive

df_NegCOVID = (df
      .where(df["COVID_Status"]==0)
      .dropna()
      .rename(columns={'Survived':'Actual_Survival'})
      .reindex(columns=['EncounterID', 'Age', 'Race', 'Age_Group', 'COVID_Status', 'InitialSOFA', 'NY_Score', 'Maryland_Score', 'Colorado_Score', 'PA_Score', 'Actual_Survival'])
      .assign(Age_Group_N = lambda df_1:
            df_1['Age_Group'].map({'<25': 1, '25-34': 2, '35-44': 3, '45-54': 4, '55-64': 5, '65-74': 6, '75-84': 7, '>85': 8}).astype(int),
            Maryland_Age = lambda df_1:
                  pd.cut(df_1['Age'], [0, 50, 70, 85, np.inf], labels=[1, 2, 3, 4])#.astype(int)
      )
)
#returns 2431 encounters where patient was COVID negative

In [4]:
##Colorado

#MC for different patient mixes with Colorado protocol
#beds = 1
patients = 20 #sets denominator of capacity and mix (i.e. number of patients per bed) across all simulations
beds = patients/2 #sets numerator of capacity (i.e. number of patients per bed) across all simulations
sample_size = 300 #size of each sample (i.e. number of patients drawn from dataset) per MC simulation run
runs = 250 #number of MC simulations per capacity level

MC_Colorado_Mix = pd.DataFrame()

for m in range(1,21): #iterate over bed numbers from 1 - 20 (to set numerator of capacity - with "patients" as denominator)
      mix = int(sample_size*(m/patients)) #set fraction of patients who are COVID+
      for i in range(runs): #iterate over each run
            sample = (pd.concat([df_PosCOVID.sample(n=(mix), replace=False),df_NegCOVID.sample(n=(sample_size-mix), replace=False)])
            .sample(n=sample_size, replace=False) #randomly shuffle dataframe w/o replacement
            .assign(Protocol = 5,
                  Run = i+1,
                  COVID_Mix = mix/sample_size,
                  Allocated = 0, 
                  Survived = 0, 
                  Bracket = lambda df_0:
                        np.arange(len(df_0)) // patients + 1, 
                  Random = np.random.randint(0,5000,size=sample_size), 
                  )
            .assign(Decision_Pair = lambda df_1: #assign a number to each pair in a bracket.
                        df_1.groupby(['Bracket'])['Allocated'].cumcount() // 2+1
                  )
            .assign(Rank=lambda df_2: df_2
                  #     .groupby(['Bracket', 'Decision_Pair'])['Random'].rank(method="first") # LOTTERY (1)
                  #     .groupby(['Bracket', 'Decision_Pair'])['NY_Score'].rank(method="first") #NY SOFA (2)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Age_Group_N'].rank(method="first") # AGE-GROUP (3)
                  #     .sort_values('Age') 
                  #     .groupby(['Bracket', 'Decision_Pair'])['Maryland_Score'].rank(method="first") #Maryland (4)
                       .groupby(['Bracket', 'Decision_Pair'])['Colorado_Score'].rank(method="first") #COLORADO (5)
                  )
            .assign(Rank = lambda df_3: df_3['Rank'].mask(df_3['Decision_Pair'] <= (beds - patients//2), 1)
                        if beds > (patients // 2) #IF beds is greater than 10, then assign first priority to everyone whose decision-pair is less than beds-10
                        else df_3['Rank'].mask(df_3['Decision_Pair'] > beds, 2) #if beds is less than or equal to 10, then assign last priority to all whose decision-pair is greater than the # of beds
                  )
            .assign(Allocated=lambda df_3: df_3['Allocated'].mask(df_3['Rank'] <= 1, 1))
            .assign(Survived = lambda df_3: df_3['Survived'].mask(df_3['Allocated'] == 1, df_3['Actual_Survival']))
            .drop(['Age_Group_N'], axis=1)

            #### MUST EITHER (1) Run without grouping and keep ecnounter numbers = large file (i.e. 4GB per file) OR (2) identify fields necessary for analysis and preserve groupby for each
            #.groupby(['Run', 'Race', 'Age_Group'], as_index=False).sum(numeric_only=True)
            .reindex(columns=['Protocol', 'COVID_Mix', 'Run', 'EncounterID', 'Allocated', 'Survived'])
            )
            MC_Colorado_Mix = pd.concat([MC_Colorado_Mix, sample])
MC_Colorado_Mix.to_csv('MC_Colorado_Mix.csv', index=False)

In [3]:
##Maryland

#MC for different patient mixes with Colorado protocol
#beds = 1
patients = 20 #sets denominator of capacity and mix (i.e. number of patients per bed) across all simulations
beds = patients/2 #sets numerator of capacity (i.e. number of patients per bed) across all simulations
sample_size = 300 #size of each sample (i.e. number of patients drawn from dataset) per MC simulation run
runs = 250 #number of MC simulations per capacity level

MC_Maryland_Mix = pd.DataFrame()

for m in range(1,21): #iterate over bed numbers from 1 - 20 (to set numerator of capacity - with "patients" as denominator)
      mix = int(sample_size*(m/patients)) #set fraction of patients who are COVID+
      for i in range(runs): #iterate over each run
            sample = (pd.concat([df_PosCOVID.sample(n=(mix), replace=False),df_NegCOVID.sample(n=(sample_size-mix), replace=False)])
            .sample(n=sample_size, replace=False) #randomly shuffle dataframe w/o replacement
            .assign(Protocol = 4,
                  Run = i+1,
                  COVID_Mix = mix/sample_size,
                  Allocated = 0, 
                  Survived = 0, 
                  Bracket = lambda df_0:
                        np.arange(len(df_0)) // patients + 1,
                  Random = np.random.randint(0,5000,size=sample_size), 
                  )
            .assign(Decision_Pair = lambda df_1: #assign a number to each pair in a bracket.
                        df_1.groupby(['Bracket'])['Allocated'].cumcount() // 2+1
                  )
            .assign(Rank=lambda df_2: df_2
                  #     .groupby(['Bracket', 'Decision_Pair'])['EncounterID'].rank(method="first") # LOTTERY (1)
                  #     .groupby(['Bracket', 'Decision_Pair'])['NY_Score'].rank(method="first") #NY SOFA (2)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Age_Group_N'].rank(method="first") # AGE-GROUP (3)
                  #     .sort_values('Maryland_Age') 
                       .groupby(['Bracket', 'Decision_Pair'])['Maryland_Score'].rank(method="first") #Maryland (4)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Colorado_Score'].rank(method="first") #COLORADO (5)
                  )
            .assign(Rank = lambda df_3: df_3['Rank'].mask(df_3['Decision_Pair'] <= (beds - patients//2), 1)
                        if beds > (patients // 2) #IF beds is greater than 10, then assign first priority to everyone whose decision-pair is less than beds-10
                        else df_3['Rank'].mask(df_3['Decision_Pair'] > beds, 2) #if beds is less than or equal to 10, then assign last priority to all whose decision-pair is greater than the # of beds
                  )
            .assign(Allocated=lambda df_3: df_3['Allocated'].mask(df_3['Rank'] <= 1, 1))
            .assign(Survived = lambda df_3: df_3['Survived'].mask(df_3['Allocated'] == 1, df_3['Actual_Survival']))
            .drop(['Age_Group_N'], axis=1)

            #### MUST EITHER (1) Run without grouping and keep ecnounter numbers = large file (i.e. 4GB per file) OR (2) identify fields necessary for analysis and preserve groupby for each
            #.groupby(['Run', 'Race', 'Age_Group'], as_index=False).sum(numeric_only=True)
            .reindex(columns=['Protocol', 'COVID_Mix', 'Run', 'EncounterID', 'Allocated', 'Survived'])
            )
            MC_Maryland_Mix = pd.concat([MC_Maryland_Mix, sample])
MC_Maryland_Mix.to_csv('MC_Maryland_Mix.csv', index=False)

In [10]:
## Age Group

#MC for different patient mixes with Colorado protocol
#beds = 1
patients = 20 #sets denominator of capacity and mix (i.e. number of patients per bed) across all simulations
beds = patients/2 #sets numerator of capacity (i.e. number of patients per bed) across all simulations
sample_size = 300 #size of each sample (i.e. number of patients drawn from dataset) per MC simulation run
runs = 250 #number of MC simulations per capacity level

MC_Age_Mix = pd.DataFrame()

for m in range(1,21): #iterate over bed numbers from 1 - 20 (to set numerator of capacity - with "patients" as denominator)
      mix = int(sample_size*(m/patients)) #set fraction of patients who are COVID+
      for i in range(runs): #iterate over each run
            sample = (pd.concat([df_PosCOVID.sample(n=(mix), replace=False),df_NegCOVID.sample(n=(sample_size-mix), replace=False)])
            .sample(n=sample_size, replace=False) #randomly shuffle dataframe w/o replacement
            .assign(Protocol = 3,
                  Run = i+1,
                  COVID_Mix = mix/sample_size,
                  Allocated = 0, 
                  Survived = 0, 
                  Bracket = lambda df_0:
                        np.arange(len(df_0)) // patients + 1,
                  Random = np.random.randint(0,5000,size=sample_size)
                  )
            .assign(Decision_Pair = lambda df_1: #assign a number to each pair in a bracket.
                        df_1.groupby(['Bracket'])['Allocated'].cumcount() // 2+1
                  )
            .assign(Rank=lambda df_2: df_2
                  #     .groupby(['Bracket', 'Decision_Pair'])['Random'].rank(method="first") # LOTTERY (1)
                  #     .groupby(['Bracket', 'Decision_Pair'])['NY_Score'].rank(method="first") #NY SOFA (2)
                       .groupby(['Bracket', 'Decision_Pair'])['Age_Group_N'].rank(method="first") # AGE-GROUP (3)
                  #     .sort_values('Age') 
                  #     .groupby(['Bracket', 'Decision_Pair'])['Maryland_Score'].rank(method="first") #Maryland (4)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Colorado_Score'].rank(method="first") #COLORADO (5)
                  )
            .assign(Rank = lambda df_3: df_3['Rank'].mask(df_3['Decision_Pair'] <= (beds - patients//2), 1)
                        if beds > (patients // 2) #IF beds is greater than 10, then assign first priority to everyone whose decision-pair is less than beds-10
                        else df_3['Rank'].mask(df_3['Decision_Pair'] > beds, 2) #if beds is less than or equal to 10, then assign last priority to all whose decision-pair is greater than the # of beds
                  )
            .assign(Allocated=lambda df_3: df_3['Allocated'].mask(df_3['Rank'] <= 1, 1))
            .assign(Survived = lambda df_3: df_3['Survived'].mask(df_3['Allocated'] == 1, df_3['Actual_Survival']))
            .drop(['Age_Group_N'], axis=1)

            #### MUST EITHER (1) Run without grouping and keep ecnounter numbers = large file (i.e. 4GB per file) OR (2) identify fields necessary for analysis and preserve groupby for each
            #.groupby(['Run', 'Race', 'Age_Group'], as_index=False).sum(numeric_only=True)
            .reindex(columns=['Protocol', 'COVID_Mix', 'Run', 'EncounterID', 'Allocated', 'Survived'])
            )
            MC_Age_Mix = pd.concat([MC_Age_Mix, sample])
MC_Age_Mix.to_csv('MC_Age_Mix.csv', index=False)

In [11]:
#New York

#MC for different patient mixes with Colorado protocol
#beds = 1
patients = 20 #sets denominator of capacity and mix (i.e. number of patients per bed) across all simulations
beds = patients/2 #sets numerator of capacity (i.e. number of patients per bed) across all simulations
sample_size = 300 #size of each sample (i.e. number of patients drawn from dataset) per MC simulation run
runs = 250 #number of MC simulations per capacity level

MC_NY_Mix = pd.DataFrame()

for m in range(1,21): #iterate over bed numbers from 1 - 20 (to set numerator of capacity - with "patients" as denominator)
      mix = int(sample_size*(m/patients)) #set fraction of patients who are COVID+
      for i in range(runs): #iterate over each run
            sample = (pd.concat([df_PosCOVID.sample(n=(mix), replace=False),df_NegCOVID.sample(n=(sample_size-mix), replace=False)])
            .sample(n=sample_size, replace=False) #randomly shuffle dataframe w/o replacement
            .assign(Protocol = 2,
                  Run = i+1,
                  COVID_Mix = mix/sample_size,
                  Allocated = 0, 
                  Survived = 0, 
                  Bracket = lambda df_0:
                        np.arange(len(df_0)) // patients + 1,
                  Random = np.random.randint(0,5000,size=sample_size) 
                  )
            .assign(Decision_Pair = lambda df_1: #assign a number to each pair in a bracket.
                        df_1.groupby(['Bracket'])['Allocated'].cumcount() // 2+1
                  )
            .assign(Rank=lambda df_2: df_2
                  #     .groupby(['Bracket', 'Decision_Pair'])['Random'].rank(method="first") # LOTTERY (1)
                       .groupby(['Bracket', 'Decision_Pair'])['NY_Score'].rank(method="first") #NY SOFA (2)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Age_Group_N'].rank(method="first") # AGE-GROUP (3)
                  #     .sort_values('Age') 
                  #     .groupby(['Bracket', 'Decision_Pair'])['Maryland_Score'].rank(method="first") #Maryland (4)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Colorado_Score'].rank(method="first") #COLORADO (5)
                  )
            .assign(Rank = lambda df_3: df_3['Rank'].mask(df_3['Decision_Pair'] <= (beds - patients//2), 1)
                        if beds > (patients // 2) #IF beds is greater than 10, then assign first priority to everyone whose decision-pair is less than beds-10
                        else df_3['Rank'].mask(df_3['Decision_Pair'] > beds, 2) #if beds is less than or equal to 10, then assign last priority to all whose decision-pair is greater than the # of beds
                  )
            .assign(Allocated=lambda df_3: df_3['Allocated'].mask(df_3['Rank'] <= 1, 1))
            .assign(Survived = lambda df_3: df_3['Survived'].mask(df_3['Allocated'] == 1, df_3['Actual_Survival']))
            .drop(['Age_Group_N'], axis=1)

            #### MUST EITHER (1) Run without grouping and keep ecnounter numbers = large file (i.e. 4GB per file) OR (2) identify fields necessary for analysis and preserve groupby for each
            #.groupby(['Run', 'Race', 'Age_Group'], as_index=False).sum(numeric_only=True)
            .reindex(columns=['Protocol', 'COVID_Mix', 'Run', 'EncounterID', 'Allocated', 'Survived'])
            )
            MC_NY_Mix = pd.concat([MC_NY_Mix, sample])
MC_NY_Mix.to_csv('MC_NY_Mix.csv', index=False)

In [12]:
##Lottery

#MC for different patient mixes with Colorado protocol
#beds = 1
patients = 20 #sets denominator of capacity and mix (i.e. number of patients per bed) across all simulations
beds = patients/2 #sets numerator of capacity (i.e. number of patients per bed) across all simulations
sample_size = 300 #size of each sample (i.e. number of patients drawn from dataset) per MC simulation run
runs = 250 #number of MC simulations per capacity level

MC_Lottery_Mix = pd.DataFrame()

for m in range(1,21): #iterate over bed numbers from 1 - 20 (to set numerator of capacity - with "patients" as denominator)
      mix = int(sample_size*(m/patients)) #set fraction of patients who are COVID+
      for i in range(runs): #iterate over each run
            sample = (pd.concat([df_PosCOVID.sample(n=(mix), replace=False),df_NegCOVID.sample(n=(sample_size-mix), replace=False)])
            .sample(n=sample_size, replace=False) #randomly shuffle dataframe w/o replacement
            .assign(Protocol = 1,
                  Run = i+1,
                  COVID_Mix = mix/sample_size,
                  Allocated = 0, 
                  Survived = 0, 
                  Bracket = lambda df_0:
                        np.arange(len(df_0)) // patients + 1,
                  Random = np.random.randint(0,5000,size=sample_size) 
                  )
            .assign(Decision_Pair = lambda df_1: #assign a number to each pair in a bracket.
                        df_1.groupby(['Bracket'])['Allocated'].cumcount() // 2+1
                  )
            .assign(Rank=lambda df_2: df_2
                       .groupby(['Bracket', 'Decision_Pair'])['Random'].rank(method="first") # LOTTERY (1)
                  #     .groupby(['Bracket', 'Decision_Pair'])['NY_Score'].rank(method="first") #NY SOFA (2)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Age_Group_N'].rank(method="first") # AGE-GROUP (3)
                  #     .sort_values('Age') 
                  #     .groupby(['Bracket', 'Decision_Pair'])['Maryland_Score'].rank(method="first") #Maryland (4)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Colorado_Score'].rank(method="first") #COLORADO (5)
                  )
            .assign(Rank = lambda df_3: df_3['Rank'].mask(df_3['Decision_Pair'] <= (beds - patients//2), 1)
                        if beds > (patients // 2) #IF beds is greater than 10, then assign first priority to everyone whose decision-pair is less than beds-10
                        else df_3['Rank'].mask(df_3['Decision_Pair'] > beds, 2) #if beds is less than or equal to 10, then assign last priority to all whose decision-pair is greater than the # of beds
                  )
            .assign(Allocated=lambda df_3: df_3['Allocated'].mask(df_3['Rank'] <= 1, 1))
            .assign(Survived = lambda df_3: df_3['Survived'].mask(df_3['Allocated'] == 1, df_3['Actual_Survival']))
            .drop(['Age_Group_N'], axis=1)

            #### MUST EITHER (1) Run without grouping and keep ecnounter numbers = large file (i.e. 4GB per file) OR (2) identify fields necessary for analysis and preserve groupby for each
            #.groupby(['Run', 'Race', 'Age_Group'], as_index=False).sum(numeric_only=True)
            .reindex(columns=['Protocol', 'COVID_Mix', 'Run', 'EncounterID', 'Allocated', 'Survived'])
            )
            MC_Lottery_Mix = pd.concat([MC_Lottery_Mix, sample])
MC_Lottery_Mix.to_csv('MC_Lottery_Mix.csv', index=False)

In [13]:
##Pure SOFA

#MC for different patient mixes with Colorado protocol
#beds = 1
patients = 20 #sets denominator of capacity and mix (i.e. number of patients per bed) across all simulations
beds = patients/2 #sets numerator of capacity (i.e. number of patients per bed) across all simulations
sample_size = 300 #size of each sample (i.e. number of patients drawn from dataset) per MC simulation run
runs = 250 #number of MC simulations per capacity level

MC_SOFA_Mix = pd.DataFrame()

for m in range(1,21): #iterate over bed numbers from 1 - 20 (to set numerator of capacity - with "patients" as denominator)
      mix = int(sample_size*(m/patients)) #set fraction of patients who are COVID+
      for i in range(runs): #iterate over each run
            sample = (pd.concat([df_PosCOVID.sample(n=(mix), replace=False),df_NegCOVID.sample(n=(sample_size-mix), replace=False)])
            .sample(n=sample_size, replace=False) #randomly shuffle dataframe w/o replacement
            .assign(Protocol = 6,
                  Run = i+1,
                  COVID_Mix = mix/sample_size,
                  Allocated = 0, 
                  Survived = 0, 
                  Bracket = lambda df_0:
                        np.arange(len(df_0)) // patients + 1,
                  Random = np.random.randint(0,5000,size=sample_size) 
                  )
            .assign(Decision_Pair = lambda df_1: #assign a number to each pair in a bracket.
                        df_1.groupby(['Bracket'])['Allocated'].cumcount() // 2+1
                  )
            .assign(Rank=lambda df_2: df_2
                  #     .groupby(['Bracket', 'Decision_Pair'])['EncounterID'].rank(method="first") # LOTTERY (1)
                  #     .groupby(['Bracket', 'Decision_Pair'])['NY_Score'].rank(method="first") #NY SOFA (2)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Age_Group_N'].rank(method="first") # AGE-GROUP (3)
                  #     .sort_values('Age') 
                  #     .groupby(['Bracket', 'Decision_Pair'])['Maryland_Score'].rank(method="first") #Maryland (4)
                  #     .groupby(['Bracket', 'Decision_Pair'])['Colorado_Score'].rank(method="first") #COLORADO (5)
                        .groupby(['Bracket', 'Decision_Pair'])['InitialSOFA'].rank(method="first") #Pure SOFA (6)
                  )
            .assign(Rank = lambda df_3: df_3['Rank'].mask(df_3['Decision_Pair'] <= (beds - patients//2), 1)
                        if beds > (patients // 2) #IF beds is greater than 10, then assign first priority to everyone whose decision-pair is less than beds-10
                        else df_3['Rank'].mask(df_3['Decision_Pair'] > beds, 2) #if beds is less than or equal to 10, then assign last priority to all whose decision-pair is greater than the # of beds
                  )
            .assign(Allocated=lambda df_3: df_3['Allocated'].mask(df_3['Rank'] <= 1, 1))
            .assign(Survived = lambda df_3: df_3['Survived'].mask(df_3['Allocated'] == 1, df_3['Actual_Survival']))
            .drop(['Age_Group_N'], axis=1)

            #### MUST EITHER (1) Run without grouping and keep ecnounter numbers = large file (i.e. 4GB per file) OR (2) identify fields necessary for analysis and preserve groupby for each
            #.groupby(['Run', 'Race', 'Age_Group'], as_index=False).sum(numeric_only=True)
            .reindex(columns=['Protocol', 'COVID_Mix', 'Run', 'EncounterID', 'Allocated', 'Survived'])
            )
            MC_SOFA_Mix = pd.concat([MC_SOFA_Mix, sample])
MC_SOFA_Mix.to_csv('MC_SOFA_Mix.csv', index=False)