# Preliminaries and Dataframe Construction

In [1]:
# Import modules
import sqlite3

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats

#Formatting
plt.rcParams['font.family'] = 'Times New Roman'  # Set plt shows font to Times New Roman
plt.rcParams['axes.grid'] = True  # Ensure line graphs display on graphs
sns.set_palette(sns.color_palette('Accent')) #set color palette to a nice seaborn style https://seaborn.pydata.org/tutorial/color_palettes.html

In [2]:
#Import Encounters from Database Query
df_baseline = pd.read_pickle("encounters.pkl").assign(Run = 1, Capacity = 1, Allocated = 1, Baseline_Surv = lambda df_: df_['Survived']).astype({'Age_Group': 'str'})
print(df_baseline.info())

df_demographics = (df_baseline
    .assign(Baseline_Surv = lambda df_baseline: df_baseline['Survived']) #create column for whether individual survived at 100% capacity / with ventilator support
    .astype({'Age_Group': 'str'})
    .reindex(columns = [
    'EncounterID',
    'Race',
    'Sex',
    'Age_Group',
    'COVID_Status',
    'Baseline_Surv',
    'LE',
    'Cho_LE']
    )
)

df_Lott_All = pd.read_csv('MC_Lott_All.csv', converters={'EncounterID':str, 'Age_Group':str}).assign(Protocol = 'Lottery').merge(df_demographics, on=['EncounterID'])
df_NY_All = pd.read_csv('MC_NY_All.csv', converters={'EncounterID':str, 'Age_Group':str}).assign(Protocol = 'NY SOFA').merge(df_demographics, on=['EncounterID'])
df_Age_All = pd.read_csv('MC_Age_All.csv', converters={'EncounterID':str}).assign(Protocol = 'Age').merge(df_demographics, on=['EncounterID'])
df_Bhavani_All = pd.read_csv('MC_Bhavani_All.csv', converters={'EncounterID':str, 'Age_Group':str}).assign(Protocol = 'Bhavani').merge(df_demographics, on=['EncounterID'])
df_Colorado_All = pd.read_csv('MC_Colorado_All.csv', converters={'EncounterID':str, 'Age_Group':str}).assign(Protocol = 'Colorado').merge(df_demographics, on=['EncounterID'])
df_sofa_All = pd.read_csv('MC_sofa_All.csv', converters={'EncounterID':str, 'Age_Group':str}).assign(Protocol = 'Pure SOFA').merge(df_demographics, on=['EncounterID'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3707 entries, 0 to 3706
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   EncounterID       3707 non-null   object 
 1   SubjectID         3707 non-null   object 
 2   Age               3707 non-null   float64
 3   Race              3707 non-null   object 
 4   Ethnicity         3707 non-null   object 
 5   Sex               3707 non-null   object 
 6   InitialSOFA       3707 non-null   int64  
 7   StayLength        3707 non-null   float64
 8   LE                3707 non-null   float64
 9   Cho_LE            3707 non-null   float64
 10  COVID_Status      3707 non-null   int8   
 11  Discharge_Status  3707 non-null   int64  
 12  Intubation        3707 non-null   int64  
 13  NY_Score          3707 non-null   int8   
 14  Bhavani_Score     3707 non-null   int64  
 15  Colorado_Score    3707 non-null   int64  
 16  Protocol          3707 non-null   object 


In [3]:
from scipy.stats.distributions import chi2

#DEFINE Raw Stats Calculator#
def get_raw_stats(df_, groups, alpha=0.05):
    return (df_
        .fillna(0)
        .assign(Exp_Surv = lambda df_: df_['Baseline_Surv']*df_['Capacity'], #multiples each individual patient (i.e. 1 or 0) by capacity (e.g. 0.5) to get expected survival in agg.
                FN = lambda df_: df_['Baseline_Surv'].mask(df_['Allocated'] == 1, 0), #return baseline, then overwrite with 0 if allocated=1
                FP = lambda df_: df_['Allocated'].mask(df_['Baseline_Surv'] == 1, 0) #return allocated, then overwrite with 0 if baseline=1
            )          
        .groupby(groups, as_index=True)
        .agg(Pop_N=pd.NamedAgg(column="Survived", aggfunc="count"),
             Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="sum"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             Survived=pd.NamedAgg(column="Survived", aggfunc="sum"),
             FN=pd.NamedAgg(column="FN", aggfunc="sum"),
             FP=pd.NamedAgg(column="FP", aggfunc="sum")
             )
        .reset_index()
        #Calculate Lives Saved and Allocation Rate
        .assign(Lives_Saved = lambda df_0: df_0['Survived']-df_0['Exp_Surv'],
                A_rate = lambda df_0: df_0['Allocated'] / df_0['Pop_N']
            )
        .assign(A_rate_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['Allocated'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N'],
                A_rate_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['Allocated']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N']
            )
        #Calculate Survival Rate (by first calculating death rate)
        .assign(Deaths = lambda df_0: df_0['Pop_N'] - df_0['Survived'])
        .assign(D_rate = lambda df_1: df_1['Deaths'] / df_1['Pop_N'])
        .assign(D_rate_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['Deaths'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N'],
                D_rate_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['Deaths']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N']
            )
        .assign(S_rate = lambda df_3: 1-df_3['D_rate'],
                S_rate_CI_lo = lambda df_3: 1-df_3['D_rate_CI_hi'],
                S_rate_CI_hi = lambda df_3: 1-df_3['D_rate_CI_lo'])
        #Calculate FNR, FPR and Lives Saved per Patient
        .assign(FN_rate = lambda df_0: df_0['FN']/(df_0['Pop_N']-df_0['Allocated']),
                FP_rate = lambda df_0: df_0['FP']/df_0['Allocated'],
                LS_rate = lambda df_0: df_0['Lives_Saved']/df_0['Pop_N']
            )
        #Cleanup
        .drop(['Deaths', 'D_rate', 'D_rate_CI_hi', 'D_rate_CI_lo'], axis=1)
        .round(4)
        .set_index(groups)
    )

#DEFINE Age-Adjusted Calculator#
def get_age_adjusted_stats(df_, groups, alpha=0.05):
    
    std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]})#.astype({'Age_Group': 'category'})

    if ('Age_Group' in groups): 
        groups_age = groups
    else:
        groups_age = groups + ['Age_Group']

    return (df_
        .fillna(0)
        .assign(Exp_Surv = lambda df_: df_['Baseline_Surv']*df_['Capacity'], #multiples each individual patient (i.e. 1 or 0) by capacity (e.g. 0.5) to get expected survival in agg.
                FN = lambda df_: df_['Baseline_Surv'].mask(df_['Allocated'] == 1, 0), #return baseline, then overwrite with 0 if allocated=1
                FP = lambda df_: df_['Allocated'].mask(df_['Baseline_Surv'] == 1, 0) #return allocated, then overwrite with 0 if baseline=1
            )
        #calculate population totals for each age group (additionally sliced by other variables, e.g. protocol, run, race)
        .groupby(groups_age, as_index=True) #originally false
        .agg(Pop_N=pd.NamedAgg(column="Survived", aggfunc="count"),
             Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="sum"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             Survived=pd.NamedAgg(column="Survived", aggfunc="sum"),
             FN=pd.NamedAgg(column="FN", aggfunc="sum"),
             FP=pd.NamedAgg(column="FP", aggfunc="sum")             
             )
        .reset_index()
        .merge(std_pop, on='Age_Group') #bring in standard pop for age-adjustment
        .assign(Std_Pop = lambda df_0: df_0['Std_Pop'].mask(df_0['Pop_N'] == 0, 0), #zero out Std_Pop for each sub-group age-band where sub-group has no subjects (i.e. no AIAN in <25)
                Deaths = lambda df_0: df_0['Pop_N'] - df_0['Survived'],
                Lives_Saved = lambda df_0: df_0['Survived']-df_0['Exp_Surv']
            )
        #Calculate Age-Adj Deaths and Variance for each age-group
        .assign(Age_Adj_D_rate = lambda df_1: (df_1['Deaths']/df_1['Pop_N']) * df_1['Std_Pop'], #calculate the age-adjusted rate (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                #Age_Adj_D_var = lambda df_1: (df_1['Std_Pop']**2)*(df_1['Deaths']/(df_1['Pop_N']**2)), ## OLD VERSION, unclear why used.
                Age_Adj_D_var = lambda df_1: df_1['Deaths']*((df_1['Std_Pop']/df_1['Pop_N'])**2), ## SEER STAT version https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm
                #variance for each age_group to be summed for total variance of Race (see WA Health doc) https://doh.wa.gov/sites/default/files/legacy/Documents/1500//ConfIntGuide.pdf
         ## Calculate Age-Adjusted FNR and FPR for each age-group
                Age_Adj_FN_rate = lambda df_1: (df_1['FN']/(df_1['Pop_N']-df_1['Allocated'])) * df_1['Std_Pop'], #first converts LS into an LS-rate, then multiples by proportion of std pop in that group
                Age_Adj_FP_rate = lambda df_1: (df_1['FP']/df_1['Allocated']) * df_1['Std_Pop'],
        ## Calculate Age-Adjusted Lives Saved Rate for each age group
                Age_Adj_LS_rate = lambda df_1: (df_1['Lives_Saved']/df_1['Pop_N']) * df_1['Std_Pop'], #first converts LS into an LS-rate, then multiples by proportion of std pop in that group
            )
        #Assign the w variables to each Age_Group (and other groupings)
        .assign(w_i = lambda df_1: df_1['Std_Pop']/df_1['Pop_N']) #calc pop weight for each Age_Group and Race (use max for Fay and Freur, and avg for Tiwari mod)
        .assign(w_max = lambda df_2: df_2.groupby(groups)['w_i'].transform('max')) #find max pop weight for Fay and Freur CIs (note use of transform, see here https://stackoverflow.com/questions/35640364/python-pandas-max-value-in-a-group-as-a-new-column
        #Collapse the age-groups to calculate total age-adjusted deaths/lives saved.
        .groupby(groups, as_index=False).sum(numeric_only=True)
        .assign(w_max = lambda df_3: df_3['w_max']/len(df_.groupby('Age_Group').count())) #divide sum of max pop weights by number of age-groups - i.e. 8 (to re-idnetify the max pop weight for Race)
        ## Calculate Fay-Feur CIs for Age-Adjusted Death Rates
        .assign(Age_Adj_D_rate_CI_lo = lambda df_3: 
                    (df_3['Age_Adj_D_var'])/(2*df_3['Age_Adj_D_rate']) *
                    chi2.ppf(alpha/2, #alpha 
                        (2*df_3['Age_Adj_D_rate']**2)/df_3['Age_Adj_D_var']), #shape
                Age_Adj_D_rate_CI_hi = lambda df_3: 
                    ((df_3['Age_Adj_D_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_D_rate']+df_3['w_max']))) *
                    chi2.ppf(1-alpha/2, # alpha
                        (2*(df_3['Age_Adj_D_rate']+df_3['w_max'])**2)/(df_3['Age_Adj_D_var']+df_3['w_max']**2)) #shape
            )
        #Calculate Age-Adjusted Survival Rates and CIs (as inverse of AA Death Rates and CIs)
        .assign(Age_Adj_S_rate = lambda df_4: 1-df_4['Age_Adj_D_rate'],
                Age_Adj_S_rate_CI_lo = lambda df_4: 1-df_4['Age_Adj_D_rate_CI_hi'],
                Age_Adj_S_rate_CI_hi = lambda df_4: 1-df_4['Age_Adj_D_rate_CI_lo'],
            )
        .drop(['Survived', 'Allocated', 'FN', 'FP', 'Exp_Surv', 'Lives_Saved', 'Pop_N', 'Deaths','Std_Pop','w_i', 'w_max', 'Age_Adj_D_var', 'Age_Adj_D_rate', 'Age_Adj_D_rate_CI_hi', 'Age_Adj_D_rate_CI_lo'], axis=1)
        .round(4)
        .set_index(groups)
    )

#df_test = get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'Race']).reset_index()
#df_test = (pd.concat((get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_sofa, ['Protocol', 'Run', 'Race'])), axis=1).reset_index())

In [16]:
std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]}).astype({'Age_Group': 'category'})

std_pop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Age_Group  8 non-null      category
 1   Std_Pop    8 non-null      float64 
dtypes: category(1), float64(1)
memory usage: 568.0 bytes


In [4]:
from scipy.stats.distributions import chi2

#DEFINE Cormorbidity Adjusted YLL Calculator#
def get_Cho_YLS_stats(df_, groups, alpha=0.05):
    return (df_
        .fillna(0)
        .assign(YLL_Cho = lambda df_: df_['Cho_LE'].mask(df_['Survived'] == 1, 0), ## if survived then zero, otherwise retain life expectancy so that we sum to get YLLs.
                Exp_LE_Cho = lambda df_: df_['Baseline_Surv']*df_['Cho_LE']*df_['Capacity'], # if patient would have survived with ventilator then their Cho_LE x Capacity (e.g. 0.5), otherwise 0 (if deceased even with ventilator)
                FN_LE_Cho = lambda df_: (df_['Baseline_Surv']*df_['Cho_LE']).mask(df_['Allocated'] == 1, 0), #return baseline*Cho_LE, then overwrite with 0 if allocated=1
                FP_LE_Cho = lambda df_: (df_['Allocated']*df_['Cho_LE']).mask(df_['Baseline_Surv'] == 1, 0) #return allocated*Cho_LE, then overwrite with 0 if baseline=1
            )
        .groupby(groups, as_index=False)
        .agg(Pop_N=pd.NamedAgg(column="Cho_LE", aggfunc="count"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             LE_Total_Cho=pd.NamedAgg(column="Cho_LE", aggfunc="sum"), #Total life expectancy for everyone, regardless of survival in 100% baseline
             Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who did survive in 100% baseline, multiplied by capacity (i.e. 0.5)
             YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="sum"), #Total life expectancy (lost) of all those who did not survive (in simulation)
             FN_LE_Cho=pd.NamedAgg(column="FN_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who would have survived, but was not allocated (false negatives)
             FP_LE_Cho=pd.NamedAgg(column="FP_LE_Cho", aggfunc="sum") #Total life expectancy for everyone who did NOT survive, but was allocated (false positives)
             )
        .assign(YLS_Cho = lambda df_1: (df_1['LE_Total_Cho']-df_1['YLL_Cho'])-df_1['Exp_LE_Cho'])
        .assign(YLS_Cho_rate = lambda df_2: df_2['YLS_Cho']/df_2['Pop_N'])
        .assign(YLS_Cho_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['YLS_Cho'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )),
                YLS_Cho_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['YLS_Cho']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    ))
            )
        #Calculate FNR and FPR (i.e. how many years of life lost to errors per patient was not allocated (FN) OR patient who was allocated (FP))
        .assign(FNR_LE_Cho = lambda df_0: df_0['FN_LE_Cho']/(df_0['Pop_N']-df_0['Allocated']),
                FPR_LE_Cho = lambda df_0: df_0['FP_LE_Cho']/df_0['Allocated']
            )
        #.drop(['Pop_N','Allocated'], axis=1)
        .round(4) #round all number to two decimal places
        .set_index(groups)
    )

#DEFINE Age-Adjusted Comordity-Adjusted YLL Calculator#
def get_age_adjusted_Cho_YLS_stats(df_, groups, alpha=0.05):
    
    std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]})#.astype({'Age_Group': 'category'})

    if ('Age_Group' in groups): 
        groups_age = groups
    else:
        groups_age = groups + ['Age_Group']

    return (df_
        .fillna(0)
        .assign(YLL_Cho = lambda df_0: df_0['Cho_LE'].mask(df_0['Survived'] == 1, 0),  ## if survived then zero, otherwise retain life expectancy so that we sum to get YLLs.
                Exp_LE_Cho = lambda df_: df_['Baseline_Surv']*df_['Cho_LE']*df_['Capacity'], # Capacity x Cho_LE if patient would have survived with ventilator, otherwise 0 (if deceased even with ventilator)
                FN_LE_Cho = lambda df_: (df_['Baseline_Surv']*df_['Cho_LE']).mask(df_['Allocated'] == 1, 0), #return baseline*Cho_LE, then overwrite with 0 if allocated=1
                FP_LE_Cho = lambda df_: (df_['Allocated']*df_['Cho_LE']).mask(df_['Baseline_Surv'] == 1, 0) #return allocated*Cho_LE, then overwrite with 0 if baseline=1
            )
        .groupby(groups_age, as_index=True)
        .agg(Pop_N=pd.NamedAgg(column="Cho_LE", aggfunc="count"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             LE_Total_Cho=pd.NamedAgg(column="Cho_LE", aggfunc="sum"),
             Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="sum"),
             YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="sum"),
             FN_LE_Cho=pd.NamedAgg(column="FN_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who would have survived, but was not allocated (false negatives)
             FP_LE_Cho=pd.NamedAgg(column="FP_LE_Cho", aggfunc="sum") #Total life expectancy for everyone who did NOT survive, but was allocated (false positives)
             )
        .reset_index()
        .merge(std_pop, on='Age_Group') #bring in standard pop for age-adjustment
        .assign(Std_Pop = lambda df_0: df_0['Std_Pop'].mask(df_0['Pop_N'] == 0, 0)) #zero out Std_Pop for each sub-group age-band where sub-group has no subjects (i.e. no AIAN in <25)
        .assign(w_i = lambda df_1: df_1['Std_Pop']/df_1['Pop_N']) #calc pop weight for each Age_Group and Race (use max for Fay and Freur, and avg for Tiwari mod)
        .assign(w_max = lambda df_2: df_2.groupby(groups)['w_i'].transform('max')) #find max pop weight for Fay and Freur CIs (note use of transform, see here https://stackoverflow.com/questions/35640364/python-pandas-max-value-in-a-group-as-a-new-column
        #Calculate Age-Adjusted Cho YLL and Cho YLS
        .assign(Age_Adj_FNR_LE_Cho = lambda df_3: (df_3['FN_LE_Cho']/(df_3['Pop_N']-df_3['Allocated'])) * df_3['Std_Pop'], #first converts FN LE into an FNR of LE, then multiples by proportion of std pop in that group
                Age_Adj_FPR_LE_Cho = lambda df_3: (df_3['FP_LE_Cho']/df_3['Allocated']) * df_3['Std_Pop'],    
                Age_Adj_YLL_Cho_rate = lambda df_3: (df_3['YLL_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                Age_Adj_LE_Total_Cho_rate = lambda df_3: (df_3['LE_Total_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                Age_Adj_Exp_LE_Cho_rate = lambda df_3: (df_3['Exp_LE_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
            )
        .groupby(groups, as_index=False).sum(numeric_only=True)
        .assign(w_max = lambda df_3: df_3['w_max']/len(df_.groupby('Age_Group').count())) #was 8 #divide sum of max pop weights by number of age-groups (to re-idnetify the max pop weight for Race)
        ##Calculate Age-Adj Years of Life Saved and CIs
        .assign(Age_Adj_YLS_Cho_rate = lambda df_0: (df_0['Age_Adj_LE_Total_Cho_rate']-df_0['Age_Adj_YLL_Cho_rate'])-df_0['Age_Adj_Exp_LE_Cho_rate'], #Calculate Raw Cho-adjusted Years of Life Saved for 'groups'
            )
        .drop(['Pop_N','Allocated','LE_Total_Cho','Exp_LE_Cho', 'YLL_Cho','FN_LE_Cho', 'FP_LE_Cho', 'Std_Pop', 'w_i', 'w_max',
               'Age_Adj_LE_Total_Cho_rate', 'Age_Adj_YLL_Cho_rate',  'Age_Adj_Exp_LE_Cho_rate',  
               #'Age_Adj_LE_Total_Cho_var', 'Age_Adj_YLL_Cho_var', 'Age_Adj_Exp_LE_Cho_var',
               ], axis=1)
        .round(4) #round all numbers to 4 decimals places
        .set_index(groups)
    )

#df_test = (pd.concat((get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Race'])), axis=1).reset_index())

In [5]:

stats_overall_All = pd.concat([
    #get_raw_stats(df_baseline, ['Capacity', 'Run','Protocol']),
    get_raw_stats(df_Lott_All, ['Capacity', 'Run','Protocol']),
    get_raw_stats(df_Age_All, ['Capacity', 'Run','Protocol']),
    get_raw_stats(df_sofa_All, ['Capacity', 'Run','Protocol']),
    get_raw_stats(df_NY_All, ['Capacity', 'Run','Protocol']),
    get_raw_stats(df_Colorado_All, ['Capacity', 'Run','Protocol']),
    get_raw_stats(df_Bhavani_All, ['Capacity', 'Run','Protocol'])
]).reset_index()

stats_race_All = pd.concat([
    #pd.concat([get_raw_stats(df_baseline, ['Capacity', 'Run', 'Protocol', 'Race']), get_age_adjusted_stats(df_baseline, ['Capacity', 'Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Lott_All, ['Capacity', 'Run', 'Protocol', 'Race']), get_age_adjusted_stats(df_Lott_All, ['Capacity', 'Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Age_All, ['Capacity', 'Run', 'Protocol', 'Race']), get_age_adjusted_stats(df_Age_All, ['Capacity', 'Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_sofa_All, ['Capacity', 'Run', 'Protocol', 'Race']), get_age_adjusted_stats(df_sofa_All, ['Capacity', 'Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_NY_All, ['Capacity', 'Run', 'Protocol', 'Race']), get_age_adjusted_stats(df_NY_All, ['Capacity', 'Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Colorado_All, ['Capacity', 'Run', 'Protocol', 'Race']), get_age_adjusted_stats(df_Colorado_All, ['Capacity', 'Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Bhavani_All, ['Capacity', 'Run', 'Protocol', 'Race']), get_age_adjusted_stats(df_Bhavani_All, ['Capacity', 'Run','Protocol', 'Race'])], axis=1).reset_index()
])

stats_age_All = pd.concat([
    #pd.concat([get_raw_stats(df_baseline, ['Capacity', 'Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_baseline, ['Capacity', 'Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Lott_All, ['Capacity', 'Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_Lott_All, ['Capacity', 'Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Age_All, ['Capacity', 'Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_Age_All, ['Capacity', 'Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_sofa_All, ['Capacity', 'Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_sofa_All, ['Capacity', 'Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_NY_All, ['Capacity', 'Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_NY_All, ['Capacity', 'Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Colorado_All, ['Capacity', 'Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_Colorado_All, ['Capacity', 'Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Bhavani_All, ['Capacity', 'Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_Bhavani_All, ['Capacity', 'Run','Protocol', 'Age_Group'])], axis=1).reset_index()
])

stats_COVID_All = pd.concat([
    #pd.concat([get_raw_stats(df_baseline, ['Capacity', 'Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_baseline, ['Capacity', 'Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Lott_All, ['Capacity', 'Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_Lott_All, ['Capacity', 'Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Age_All, ['Capacity', 'Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_Age_All, ['Capacity', 'Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_sofa_All, ['Capacity', 'Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_sofa_All, ['Capacity', 'Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_NY_All, ['Capacity', 'Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_NY_All, ['Capacity', 'Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Colorado_All, ['Capacity', 'Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_Colorado_All, ['Capacity', 'Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_Bhavani_All, ['Capacity', 'Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_Bhavani_All, ['Capacity', 'Run','Protocol', 'COVID_Status'])], axis=1).reset_index()
])

'''
##########################
# YLL Sheets #
##########################
stats_YLL_overall_All = pd.concat([
    get_raw_YLL(df_baseline, ['Capacity','Run','Protocol']),
    get_raw_YLL(df_Lott_All, ['Capacity','Run','Protocol']), 
    get_raw_YLL(df_Age_All, ['Capacity','Run','Protocol']), 
    get_raw_YLL(df_sofa_All, ['Capacity','Run','Protocol']), 
    get_raw_YLL(df_NY_All, ['Capacity','Run','Protocol']),
    get_raw_YLL(df_Colorado_All, ['Capacity','Run','Protocol']),
    get_raw_YLL(df_Bhavani_All, ['Capacity','Run','Protocol'])
]).reset_index()

stats_YLL_race_All = pd.concat([
    pd.concat([get_raw_YLL(df_baseline, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_YLL(df_baseline, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Lott_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_YLL(df_Lott_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Age_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_YLL(df_Age_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_sofa_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_YLL(df_sofa_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_NY_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_YLL(df_NY_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Colorado_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_YLL(df_Colorado_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Bhavani_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_YLL(df_Bhavani_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index()
])

stats_YLL_age_All = pd.concat([
    pd.concat([get_raw_YLL(df_baseline, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_YLL(df_baseline, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Lott_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_YLL(df_Lott_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Age_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_YLL(df_Age_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_sofa_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_YLL(df_sofa_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_NY_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_YLL(df_NY_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Colorado_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_YLL(df_Colorado_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Bhavani_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_YLL(df_Bhavani_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index()
])

stats_YLL_COVID_All = pd.concat([
    pd.concat([get_raw_YLL(df_baseline, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_YLL(df_baseline, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Lott_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_YLL(df_Lott_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Age_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_YLL(df_Age_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_sofa_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_YLL(df_sofa_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_NY_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_YLL(df_NY_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Colorado_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_YLL(df_Colorado_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_Bhavani_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_YLL(df_Bhavani_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index()
])
'''

##########################
# Cho YLL Sheets #
##########################
stats_Cho_overall_All = pd.concat([
    #get_Cho_YLS_stats(df_baseline, ['Capacity','Run','Protocol']), 
    get_Cho_YLS_stats(df_Lott_All, ['Capacity','Run','Protocol']),
    get_Cho_YLS_stats(df_Age_All, ['Capacity','Run','Protocol']),  
    get_Cho_YLS_stats(df_sofa_All, ['Capacity','Run','Protocol']),  
    get_Cho_YLS_stats(df_NY_All, ['Capacity','Run','Protocol']),
    get_Cho_YLS_stats(df_Colorado_All, ['Capacity','Run','Protocol']),
    get_Cho_YLS_stats(df_Bhavani_All, ['Capacity','Run','Protocol'])
]).reset_index()

stats_Cho_race_All = pd.concat([
    #pd.concat([get_Cho_YLS_stats(df_baseline, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_Cho_YLS_stats(df_baseline, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Lott_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_Cho_YLS_stats(df_Lott_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Age_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_Cho_YLS_stats(df_Age_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_sofa_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_Cho_YLS_stats(df_sofa_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_NY_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_Cho_YLS_stats(df_NY_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Colorado_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_Cho_YLS_stats(df_Colorado_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Bhavani_All, ['Capacity','Run','Protocol', 'Race']), get_age_adjusted_Cho_YLS_stats(df_Bhavani_All, ['Capacity','Run','Protocol', 'Race'])], axis=1).reset_index()
])

stats_Cho_age_All = pd.concat([
   #pd.concat([get_Cho_YLS_stats(df_baseline, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_baseline, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Lott_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_Lott_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Age_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_Age_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_sofa_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_sofa_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_NY_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_NY_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Colorado_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_Colorado_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Bhavani_All, ['Capacity','Run','Protocol', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_Bhavani_All, ['Capacity','Run','Protocol', 'Age_Group'])], axis=1).reset_index()
])

stats_Cho_COVID_All = pd.concat([
    #pd.concat([get_Cho_YLS_stats(df_baseline, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_baseline, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Lott_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_Lott_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Age_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_Age_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_sofa_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_sofa_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_NY_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_NY_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Colorado_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_Colorado_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_Bhavani_All, ['Capacity','Run','Protocol', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_Bhavani_All, ['Capacity','Run','Protocol', 'COVID_Status'])], axis=1).reset_index()
])

###Use below to convert Stats to excel sheets####

with pd.ExcelWriter("MC-Capacity-results-stats.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    stats_overall_All.to_excel(writer, sheet_name="Overall", index=False)
    stats_race_All.to_excel(writer, sheet_name="Race", index=False)
    stats_age_All.to_excel(writer, sheet_name="Age Group", index=False)
    stats_COVID_All.to_excel(writer, sheet_name="COVID Status", index=False)
#    stats_YLL_overall_All.to_excel(writer, sheet_name="YLL_Overall", index=False)
#    stats_YLL_race_All.to_excel(writer, sheet_name="YLL_Race", index=False)
#    stats_YLL_age_All.to_excel(writer, sheet_name="YLL_Age_Group", index=False)
#    stats_YLL_COVID_All.to_excel(writer, sheet_name="YLL_COVID_Status", index=False)
    stats_Cho_overall_All.to_excel(writer, sheet_name="Cho_Overall", index=False)
    stats_Cho_race_All.to_excel(writer, sheet_name="Cho_Race", index=False)
    stats_Cho_age_All.to_excel(writer, sheet_name="Cho_Age_Group", index=False)
    stats_Cho_COVID_All.to_excel(writer, sheet_name="Cho_COVID_Status", index=False)



## Basic Tables

### Survival Rates and Lives Saved by Capacity

In [2]:
stats_overall_All = pd.read_excel('MC-Capacity-results-stats.xlsx', sheet_name='Overall')
stats_race_All = pd.read_excel('MC-Capacity-results-stats.xlsx', sheet_name='Race')
stats_age_All = pd.read_excel('MC-Capacity-results-stats.xlsx', sheet_name='Age Group')
stats_COVID_All = pd.read_excel('MC-Capacity-results-stats.xlsx', sheet_name='COVID Status')

#Overall Stats
print(stats_overall_All.groupby(['Protocol', 'Capacity']).mean())


                      Run   Pop_N   Exp_Surv  Allocated  Survived        FN  \
Protocol  Capacity                                                            
Age       0.05      125.5  3700.0   134.2486      185.0   145.188  2539.784   
          0.10      125.5  3700.0   268.4904      370.0   289.580  2395.324   
          0.15      125.5  3700.0   402.7296      555.0   434.780  2250.084   
          0.20      125.5  3700.0   536.9880      740.0   579.872  2105.068   
          0.25      125.5  3700.0   671.2450      925.0   724.416  1960.564   
...                   ...     ...        ...        ...       ...       ...   
Pure SOFA 0.80      125.5  3700.0  2147.9392     2960.0  2173.172   511.752   
          0.85      125.5  3700.0  2282.1752     3145.0  2300.704   384.208   
          0.90      125.5  3700.0  2416.5432     3330.0  2428.080   256.968   
          0.95      125.5  3700.0  2550.6132     3515.0  2556.592   128.264   
          1.00      125.5  3700.0  2684.8760     370

In [3]:
#Summary Stats Generator Capacity Levels (Change "Query")

print('-'*30, 'All Protocols - Raw Survival Rate')
stats_n_All = (stats_overall_All
    .query('Capacity == 0.5') ###CHANGE HERE
    .groupby(['Protocol'])['S_rate'].agg(['mean', 'std', 'sem'])
    .assign(ci95_hi = lambda df_:
            df_['mean'] + 1.96* df_['sem'],
            ci95_lo = lambda df_:
            df_['mean'] - 1.96* df_['sem']
    )
    .drop(['std','sem'], axis=1)
    .round(3)   
)
print(stats_n_All)
print('-'*30)

print('-'*30, 'All Protocols - Lives Saved Rate')
stats_n_All = (stats_overall_All
    .query('Capacity == 0.5') ###CHANGE HERE
    .groupby(['Protocol'])['LS_rate'].agg(['mean', 'std', 'sem'])
    .assign(ci95_lo = lambda df_:
            df_['mean'] - 1.96* df_['sem'],
            ci95_hi = lambda df_:
            df_['mean'] + 1.96* df_['sem']
    )
    .drop(['std','sem'], axis=1)
    .round(4)   
    .mul(1000) # remove to do per patient, rather than per 1000 patients
)
print(stats_n_All)
print('-'*30)

------------------------------ All Protocols - Raw Survival Rate
            mean  ci95_hi  ci95_lo
Protocol                          
Age        0.392    0.392    0.392
Bhavani    0.381    0.381    0.381
Colorado   0.377    0.378    0.377
Lottery    0.363    0.363    0.362
NY SOFA    0.376    0.376    0.375
Pure SOFA  0.379    0.380    0.379
------------------------------
------------------------------ All Protocols - Lives Saved Rate
           mean  ci95_lo  ci95_hi
Protocol                         
Age        29.2     28.8     29.5
Bhavani    18.1     17.7     18.5
Colorado   14.4     14.0     14.8
Lottery     0.1     -0.4      0.5
NY SOFA    12.9     12.5     13.3
Pure SOFA  16.4     16.0     16.8
------------------------------


### Life Years Saved by Capacity, Basic Table

In [4]:
stats_Cho_overall_All = pd.read_excel('MC-Capacity-results-stats.xlsx', sheet_name='Cho_Overall')
stats_Cho_race_All = pd.read_excel('MC-Capacity-results-stats.xlsx', sheet_name='Cho_Race')
stats_Cho_age_All = pd.read_excel('MC-Capacity-results-stats.xlsx', sheet_name='Cho_Age_Group')
stats_Cho_COVID_All = pd.read_excel('MC-Capacity-results-stats.xlsx', sheet_name='Cho_COVID_Status')

#Overall Stats
print(stats_Cho_overall_All.groupby(['Protocol', 'Capacity']).mean())


                      Run   Pop_N  Allocated  LE_Total_Cho   Exp_LE_Cho  \
Protocol  Capacity                                                        
Age       0.05      125.5  3700.0      185.0    91080.6788   3516.07980   
          0.10      125.5  3700.0      370.0    91078.2008   7031.98012   
          0.15      125.5  3700.0      555.0    91074.4276  10547.50410   
          0.20      125.5  3700.0      740.0    91075.0164  14063.68880   
          0.25      125.5  3700.0      925.0    91075.1376  17579.25120   
...                   ...     ...        ...           ...          ...   
Pure SOFA 0.80      125.5  3700.0     2960.0    91075.8656  56253.90464   
          0.85      125.5  3700.0     3145.0    91077.2856  59770.37548   
          0.90      125.5  3700.0     3330.0    91075.6060  63287.93628   
          0.95      125.5  3700.0     3515.0    91076.9180  66801.86478   
          1.00      125.5  3700.0     3700.0    91077.0200  70317.02280   

                       Y

In [28]:
#Summary Stats Generator Capacity Levels (Change "Query")

print('-'*30, 'All Protocols - YLS Rate')
stats_Cho_n_All = (stats_Cho_overall_All
    .query('Capacity == 0.90') ###CHANGE HERE
    .groupby(['Protocol'])['YLS_Cho_rate'].agg(['mean', 'std', 'sem'])
    .assign(ci95_lo = lambda df_:
            df_['mean'] - 1.96* df_['sem'],
            ci95_hi = lambda df_:
            df_['mean'] + 1.96* df_['sem'],
    )
    .drop(['std','sem'], axis=1)
    .round(4)
    .mul(1000) # remove to do per patient, rather than per 1000 patients   
)
print(stats_Cho_n_All)
print('-'*30)

------------------------------ All Protocols - YLS Rate
            mean  ci95_lo  ci95_hi
Protocol                          
Age        681.2    674.2    688.1
Bhavani    298.7    289.6    307.9
Colorado   200.5    190.6    210.3
Lottery     27.5     17.4     37.7
NY SOFA     85.8     75.0     96.7
Pure SOFA  162.7    153.4    172.0
------------------------------
