# Preliminaries and Dataframe Construction

In [2]:
# Import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
import random

#Import Encounters from Database Query
df = pd.read_pickle("encounters.pkl")

#Formatting
plt.rcParams['font.family'] = 'Times New Roman'  # Set plt shows font to Times New Roman
plt.rcParams['axes.grid'] = True  # Ensure line graphs display on graphs
sns.set_palette(sns.color_palette('Set2')) #set color palette to a nice seaborn style https://seaborn.pydata.org/tutorial/color_palettes.html

### Monte Carlo Model for 50% Capacity TEST
https://towardsdatascience.com/monte-carlo-simulation-and-variants-with-python-43e3e7c59e1f \
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sample.html \
https://replit.com/@JuniLearning/Monte-Carlo-Lab\
https://stackoverflow.com/questions/37009287/using-pandas-append-within-for-loop/37009561#37009561


In [6]:
df_small_TEST = (df
      .rename(columns={'Survived':'Actual_Survival'})
      .reindex(columns=['EncounterID', 'Age', 'Bhavani_Score', 'Actual_Survival'])
)
#MC for different degrees of scarcity with NY Protocol
beds = 1
patients = 2 #patients fixed level across simulations
decisions = 3700 #number of monte carlo samples (i.e. ) per MC simulation
runs = 10 #number of MC simulations per capacity level

results_TEST_50 = pd.DataFrame()

for i in range(runs): #iterate over each run
      sample = (df_small_TEST
      .sample(n=decisions, replace=False) #randomly shuffle dataframe
      .assign(Protocol = 'Bhavani', Run = i+1, Capacity = (beds/patients), Allocated=0, Survived = 0)
      .assign(Decision = lambda df_1:
                  np.arange(len(df_1)) // patients + 1
            )
      .assign(Rank=lambda df_2: df_2
                  .sort_values('Age')
                  .groupby(['Decision'])['Bhavani_Score'].rank(method="first")
            )
      .assign(Allocated=lambda df_3: 
                  df_3['Allocated'].mask(df_3['Rank'] <= beds, 1),
            Survived = lambda df_3: 
                  df_3['Survived'].mask(df_3['Rank'] <= beds, df_3['Actual_Survival'])
            )
      )
      results_TEST_50 = pd.concat([results_TEST_50, sample])

#results_TEST_50.to_csv('MC_TEST_50.csv', index=False)

### Monte Carlo Sim (50% Scarcity - Replicate Bhavani for NY SOFA, Age-Groups, Lottery, Bhavani Multi-Principle, Colorado (modified))

In [23]:
#NY Protocol (50% Scarcity)
df_small_ny = (df
      .rename(columns={'Survived':'Actual_Survival'})
      .reindex(columns=['EncounterID', 'NY_Score', 'Actual_Survival'])
)
#MC for different degrees of scarcity with NY Protocol
beds = 1
patients = 2 #patients fixed level across simulations
decisions = 3700 #number of monte carlo samples (i.e. ) per MC simulation
runs = 1000 #number of MC simulations per capacity level

results_NY_50 = pd.DataFrame()

for i in range(runs): #iterate over each run
      sample = (df_small_ny
      .sample(n=decisions, replace=False) #randomly shuffle dataframe
      .assign(Protocol = 2, Run = i+1, Capacity = (beds/patients), Allocated=0, Survived = 0)
      .assign(Decision = lambda df_1:
                  np.arange(len(df_1)) // patients + 1
            )
      .assign(Rank=lambda df_2: 
                  df_2.groupby(['Decision'])['NY_Score'].rank(method="first")
            )
      .assign(Allocated=lambda df_3: 
                  df_3['Allocated'].mask(df_3['Rank'] <= beds, 1),
            Survived = lambda df_3: 
                  df_3['Survived'].mask(df_3['Rank'] <= beds, df_3['Actual_Survival'])
            )
      )
      results_NY_50 = pd.concat([results_NY_50, sample])

results_NY_50.to_csv('MC_NY_50.csv', index=False)


### Monte Carlo (Age-Banding)
Age bands are '<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'

In [24]:
#Age, 50% Scarcity
df_small_age = (df
      .rename(columns={'Survived':'Actual_Survival'})
      .reindex(columns=['EncounterID', 'Age_Group', 'Actual_Survival'])
)
#MC for different degrees of scarcity with NY Protocol
beds = 1
patients = 2 #patients fixed level across simulations
decisions = 3700 #number of monte carlo samples (i.e. ) per MC simulation
runs = 1000 #number of MC simulations per capacity level

results_Age_50 = pd.DataFrame()

for i in range(runs): #iterate over each run
      sample = (df_small_age
      .sample(n=decisions, replace=False) #randomly shuffle dataframe
      .assign(Protocol = 3,
            Run = i+1, 
            Capacity = (beds/patients), 
            Allocated=0, 
            Survived = 0, 
            )
      .assign(Decision = lambda df_1:
                  np.arange(len(df_1)) // patients + 1, 
            Age_Group_N = lambda df_1:
                  df_1['Age_Group'].map({'<25': 1, '25-34': 2, '35-44': 3, '45-54': 4, '55-64': 5, '65-74': 6, '75-84': 7, '>85': 8}).astype(int)
            )
      .assign(Rank=lambda df_2: 
                  df_2.groupby(['Decision'])['Age_Group_N'].rank(method="first")
            )
      .assign(Allocated=lambda df_3: 
                  df_3['Allocated'].mask(df_3['Rank'] <= beds, 1),
            Survived = lambda df_3: 
                  df_3['Survived'].mask(df_3['Rank'] <= beds, df_3['Actual_Survival'])
            )
      .drop(['Age_Group_N'], axis=1)
      )
      results_Age_50 = pd.concat([results_Age_50, sample])

results_Age_50.to_csv('MC_Age_50.csv', index=False)

### Monte Carlo (Lottery)

In [25]:
#Lottery, 50% Scarcity
df_small_lott = (df
      .rename(columns={'Survived':'Actual_Survival'})
      .reindex(columns=['EncounterID', 'Actual_Survival'])
)
#MC for different degrees of scarcity with NY Protocol
beds = 1
patients = 2 #patients fixed level across simulations
decisions = 3700 #number of monte carlo samples (i.e. ) per MC simulation
runs = 1000 #number of MC simulations per capacity level

results_Lott_50 = pd.DataFrame()

for i in range(runs): #iterate over each run
      sample = (df_small_lott
      .sample(n=decisions, replace=False) #randomly shuffle dataframe
      .assign(Protocol = 1, Run = i+1, Capacity = (beds/patients), Allocated=0, Survived = 0)
      .assign(Random = np.random.randint(0,5000,size=3700), #generate random integer
            Decision = lambda df_1:
                  np.arange(len(df_1)) // patients + 1
            )
      .assign(Rank=lambda df_2: 
                  df_2.groupby(['Decision'])['Random'].rank(method="first")
            )
      .assign(Allocated=lambda df_3: 
                  df_3['Allocated'].mask(df_3['Rank'] <= beds, 1),
            Survived = lambda df_3: 
                  df_3['Survived'].mask(df_3['Rank'] <= beds, df_3['Actual_Survival'])
            )
      )
      results_Lott_50 = pd.concat([results_Lott_50, sample])

results_Lott_50.to_csv('MC_Lott_50.csv', index=False)

In [26]:
#Bhavani, 50% Scarcity
df_small_bhavani = (df
      .rename(columns={'Survived':'Actual_Survival'})
      .reindex(columns=['EncounterID', 'Bhavani_Score', 'Age', 'Actual_Survival'])
)
#MC for different degrees of scarcity with NY Protocol
beds = 1
patients = 2 #patients fixed level across simulations
decisions = 3700 #number of monte carlo samples (i.e. ) per MC simulation
runs = 1000 #number of MC simulations per capacity level

results_Bhavani_50 = pd.DataFrame()

for i in range(runs): #iterate over each run
      sample = (df_small_bhavani
      .sample(n=decisions, replace=False) #randomly shuffle dataframe
      .assign(Protocol = 4, Run = i+1, Capacity = (beds/patients), Allocated=0, Survived = 0)
      .assign(Decision = lambda df_1:
                  np.arange(len(df_1)) // patients + 1
            )
      .assign(Rank=lambda df_2: df_2
                  .sort_values('Age')
                  .groupby(['Decision'])['Bhavani_Score'].rank(method="first")
            )
      .assign(Allocated=lambda df_3: 
                  df_3['Allocated'].mask(df_3['Rank'] <= beds, 1),
            Survived = lambda df_3: 
                  df_3['Survived'].mask(df_3['Rank'] <= beds, df_3['Actual_Survival'])
            )
      )
      results_Bhavani_50 = pd.concat([results_Bhavani_50, sample])

results_Bhavani_50.to_csv('MC_Bhavani_50.csv', index=False)

In [27]:
#Colorado, 50% Scarcity
df_small_colorado = (df
      .rename(columns={'Survived':'Actual_Survival'})
      .reindex(columns=['EncounterID', 'Colorado_Score', 'Actual_Survival'])
)
#MC for different degrees of scarcity with NY Protocol
beds = 1
patients = 2 #patients fixed level across simulations
decisions = 3700 #number of monte carlo samples (i.e. ) per MC simulation
runs = 1000 #number of MC simulations per capacity level

results_Colorado_50 = pd.DataFrame()

for i in range(runs): #iterate over each run
      sample = (df_small_colorado
      .sample(n=decisions, replace=False) #randomly shuffle dataframe
      .assign(Protocol = 5, Run = i+1, Capacity = (beds/patients), Allocated=0, Survived = 0)
      .assign(Decision = lambda df_1:
                  np.arange(len(df_1)) // patients + 1
            )
      .assign(Rank=lambda df_2: 
                  df_2.groupby(['Decision'])['Colorado_Score'].rank(method="first")
            )
      .assign(Allocated=lambda df_3: 
                  df_3['Allocated'].mask(df_3['Rank'] <= beds, 1),
            Survived = lambda df_3: 
                  df_3['Survived'].mask(df_3['Rank'] <= beds, df_3['Actual_Survival'])
            )
      )
      results_Colorado_50 = pd.concat([results_Colorado_50, sample])

results_Colorado_50.to_csv('MC_Colorado_50.csv', index=False)

In [3]:
#Pure SOFA, 50% Scarcity
df_small_sofa = (df
      .rename(columns={'Survived':'Actual_Survival'})
      .reindex(columns=['EncounterID', 'InitialSOFA', 'Actual_Survival'])
)
#MC for different degrees of scarcity with NY Protocol
beds = 1
patients = 2 #patients fixed level across simulations
decisions = 3700 #number of monte carlo samples (i.e. ) per MC simulation
runs = 1000 #number of MC simulations per capacity level

results_sofa_50 = pd.DataFrame()

for i in range(runs): #iterate over each run
      sample = (df_small_sofa
      .sample(n=decisions, replace=False) #randomly shuffle dataframe
      .assign(Protocol = 5, Run = i+1, Capacity = (beds/patients), Allocated=0, Survived = 0)
      .assign(Decision = lambda df_1:
                  np.arange(len(df_1)) // patients + 1
            )
      .assign(Rank=lambda df_2: 
                  df_2.groupby(['Decision'])['InitialSOFA'].rank(method="first")
            )
      .assign(Allocated=lambda df_3: 
                  df_3['Allocated'].mask(df_3['Rank'] <= beds, 1),
            Survived = lambda df_3: 
                  df_3['Survived'].mask(df_3['Rank'] <= beds, df_3['Actual_Survival'])
            )
      )
      results_sofa_50 = pd.concat([results_sofa_50, sample])

results_sofa_50.to_csv('MC_sofa_50.csv', index=False)