# Preliminaries and Dataframe Construction

In [10]:
# Import modules
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats

#Formatting
plt.rcParams['font.family'] = 'Times New Roman'  # Set plt shows font to Times New Roman
plt.rcParams['axes.grid'] = True  # Ensure line graphs display on graphs
sns.set_palette(sns.color_palette('Accent')) #set color palette to a nice seaborn style https://seaborn.pydata.org/tutorial/color_palettes.html

In [None]:
#Import Encounters from Database Query
df_baseline = pd.read_pickle("encounters.pkl").assign(Run = 1, Capacity = 1, Allocated = 1, Baseline_Surv = lambda df_: df_['Survived']).astype({'Age_Group': 'str'})
print(df_baseline.info())

df_demographics = (df_baseline
    .assign(Baseline_Surv = lambda df_baseline: df_baseline['Survived']) #create column for whether individual survived at 100% capacity / with ventilator support
    .reindex(columns = [
    'EncounterID',
    'Race',
    'Sex',
    'Age_Group',
    'COVID_Status',
    'Baseline_Surv',
    'LE',
    'Cho_LE']
    )
)

df_50_NY = pd.read_csv('MC_NY_50.csv', converters={'EncounterID':str}).assign(Protocol = 'New York').merge(df_demographics, on=['EncounterID'])
df_50_Age = pd.read_csv('MC_Age_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Age').merge(df_demographics, on=['EncounterID', 'Age_Group'])
df_50_Lott = pd.read_csv('MC_Lott_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Lottery').merge(df_demographics, on=['EncounterID'])
df_50_Maryland = pd.read_csv('MC_Maryland_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Maryland').merge(df_demographics, on=['EncounterID'])
df_50_Colorado = pd.read_csv('MC_Colorado_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Colorado').merge(df_demographics, on=['EncounterID'])
df_50_sofa = pd.read_csv('MC_sofa_50.csv', converters={'EncounterID':str}).assign(Protocol = 'SOFA').merge(df_demographics, on=['EncounterID'])
df_50_PA = pd.read_csv('MC_PA_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Pennsylvania').merge(df_demographics, on=['EncounterID'])
df_50_Maryland_Age = pd.read_csv('MC_Maryland_Age_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Maryland_Age').merge(df_demographics, on=['EncounterID'])


In [12]:
from scipy.stats.distributions import chi2

#DEFINE Raw Stats Calculator#
def get_raw_stats(df_, groups, alpha=0.05):
    return (df_
        .fillna(0)
        .assign(Exp_Surv = lambda df_: df_['Baseline_Surv']*df_['Capacity'], #multiples each individual patient (i.e. 1 or 0) by capacity (e.g. 0.5) to get expected survival in agg.
                FN = lambda df_: df_['Baseline_Surv'].mask(df_['Allocated'] == 1, 0), #return baseline, then overwrite with 0 if allocated=1
                FP = lambda df_: df_['Allocated'].mask(df_['Baseline_Surv'] == 1, 0) #return allocated, then overwrite with 0 if baseline=1
            )          
        .groupby(groups, as_index=True)
        .agg(Pop_N=pd.NamedAgg(column="Survived", aggfunc="count"),
             Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="sum"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             Survived=pd.NamedAgg(column="Survived", aggfunc="sum"),
             FN=pd.NamedAgg(column="FN", aggfunc="sum"),
             FP=pd.NamedAgg(column="FP", aggfunc="sum")
             )
        .reset_index()
        #Calculate Lives Saved and Allocation Rate
        .assign(Lives_Saved = lambda df_0: df_0['Survived']-df_0['Exp_Surv'],
                A_rate = lambda df_0: df_0['Allocated'] / df_0['Pop_N']
            )
        .assign(A_rate_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['Allocated'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N'],
                A_rate_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['Allocated']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N']
            )
        #Calculate Survival Rate (by first calculating death rate)
        .assign(Deaths = lambda df_0: df_0['Pop_N'] - df_0['Survived'])
        .assign(D_rate = lambda df_1: df_1['Deaths'] / df_1['Pop_N'])
        .assign(D_rate_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['Deaths'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N'],
                D_rate_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['Deaths']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N']
            )
        .assign(S_rate = lambda df_3: 1-df_3['D_rate'],
                S_rate_CI_lo = lambda df_3: 1-df_3['D_rate_CI_hi'],
                S_rate_CI_hi = lambda df_3: 1-df_3['D_rate_CI_lo'])
        #Calculate FNR, FPR and Lives Saved per Patient
        .assign(FN_rate = lambda df_0: df_0['FN']/(df_0['Pop_N']-df_0['Allocated']),
                FP_rate = lambda df_0: df_0['FP']/df_0['Allocated'],
                LS_rate = lambda df_0: df_0['Lives_Saved']/df_0['Pop_N']
            )
        #Cleanup
        .drop(['Deaths', 'D_rate', 'D_rate_CI_hi', 'D_rate_CI_lo'], axis=1)
        .round(4)
        .set_index(groups)
    )

#DEFINE Age-Adjusted Calculator#
def get_age_adjusted_stats(df_, groups, alpha=0.05):
    
    std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]})

    if ('Age_Group' in groups): 
        groups_age = groups
    else:
        groups_age = groups + ['Age_Group']

    return (df_
        .fillna(0)
        .assign(Exp_Surv = lambda df_: df_['Baseline_Surv']*df_['Capacity'], #multiples each individual patient (i.e. 1 or 0) by capacity (e.g. 0.5) to get expected survival in agg.
                FN = lambda df_: df_['Baseline_Surv'].mask(df_['Allocated'] == 1, 0), #return baseline, then overwrite with 0 if allocated=1
                FP = lambda df_: df_['Allocated'].mask(df_['Baseline_Surv'] == 1, 0) #return allocated, then overwrite with 0 if baseline=1
            )
        #calculate population totals for each age group (additionally sliced by other variables, e.g. protocol, run, race)
        .groupby(groups_age, as_index=True) #originally false
        .agg(Pop_N=pd.NamedAgg(column="Survived", aggfunc="count"),
             Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="sum"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             Survived=pd.NamedAgg(column="Survived", aggfunc="sum"),
             FN=pd.NamedAgg(column="FN", aggfunc="sum"),
             FP=pd.NamedAgg(column="FP", aggfunc="sum")             
             )
        .reset_index()
        .merge(std_pop, on='Age_Group') #bring in standard pop for age-adjustment
        .assign(Std_Pop = lambda df_0: df_0['Std_Pop'].mask(df_0['Pop_N'] == 0, 0), #zero out Std_Pop for each sub-group age-band where sub-group has no subjects (i.e. no AIAN in <25)
                Deaths = lambda df_0: df_0['Pop_N'] - df_0['Survived'],
                Lives_Saved = lambda df_0: df_0['Survived']-df_0['Exp_Surv']
            )
        #Calculate Age-Adj Deaths and Variance for each age-group
        .assign(Age_Adj_D_rate = lambda df_1: (df_1['Deaths']/df_1['Pop_N']) * df_1['Std_Pop'], #calculate the age-adjusted rate (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                #Age_Adj_D_var = lambda df_1: (df_1['Std_Pop']**2)*(df_1['Deaths']/(df_1['Pop_N']**2)), ## OLD VERSION, unclear why used.
                Age_Adj_D_var = lambda df_1: df_1['Deaths']*((df_1['Std_Pop']/df_1['Pop_N'])**2), ## SEER STAT version https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm
                #variance for each age_group to be summed for total variance of Race (see WA Health doc) https://doh.wa.gov/sites/default/files/legacy/Documents/1500//ConfIntGuide.pdf
         ## Calculate Age-Adjusted FNR and FPR for each age-group
                Age_Adj_FN_rate = lambda df_1: (df_1['FN']/(df_1['Pop_N']-df_1['Allocated'])) * df_1['Std_Pop'], #first converts LS into an LS-rate, then multiples by proportion of std pop in that group
                Age_Adj_FP_rate = lambda df_1: (df_1['FP']/df_1['Allocated']) * df_1['Std_Pop'],
        ## Calculate Age-Adjusted Lives Saved Rate for each age group
                Age_Adj_LS_rate = lambda df_1: (df_1['Lives_Saved']/df_1['Pop_N']) * df_1['Std_Pop'], #first converts LS into an LS-rate, then multiples by proportion of std pop in that group
            )
        #Assign the w variables to each Age_Group (and other groupings)
        .assign(w_i = lambda df_1: df_1['Std_Pop']/df_1['Pop_N']) #calc pop weight for each Age_Group and Race (use max for Fay and Freur, and avg for Tiwari mod)
        .assign(w_max = lambda df_2: df_2.groupby(groups)['w_i'].transform('max')) #find max pop weight for Fay and Freur CIs (note use of transform, see here https://stackoverflow.com/questions/35640364/python-pandas-max-value-in-a-group-as-a-new-column
        #Collapse the age-groups to calculate total age-adjusted deaths/lives saved.
        .groupby(groups, as_index=False).sum(numeric_only=True)
        .assign(w_max = lambda df_3: df_3['w_max']/len(df_.groupby('Age_Group').count())) #divide sum of max pop weights by number of age-groups - i.e. 8 (to re-idnetify the max pop weight for Race)
        ## Calculate Fay-Feur CIs for Age-Adjusted Death Rates
        .assign(Age_Adj_D_rate_CI_lo = lambda df_3: 
                    (df_3['Age_Adj_D_var'])/(2*df_3['Age_Adj_D_rate']) *
                    chi2.ppf(alpha/2, #alpha 
                        (2*df_3['Age_Adj_D_rate']**2)/df_3['Age_Adj_D_var']), #shape
                Age_Adj_D_rate_CI_hi = lambda df_3: 
                    ((df_3['Age_Adj_D_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_D_rate']+df_3['w_max']))) *
                    chi2.ppf(1-alpha/2, # alpha
                        (2*(df_3['Age_Adj_D_rate']+df_3['w_max'])**2)/(df_3['Age_Adj_D_var']+df_3['w_max']**2)) #shape
            )
        #Calculate Age-Adjusted Survival Rates and CIs (as inverse of AA Death Rates and CIs)
        .assign(Age_Adj_S_rate = lambda df_4: 1-df_4['Age_Adj_D_rate'],
                Age_Adj_S_rate_CI_lo = lambda df_4: 1-df_4['Age_Adj_D_rate_CI_hi'],
                Age_Adj_S_rate_CI_hi = lambda df_4: 1-df_4['Age_Adj_D_rate_CI_lo'],
            )
        .drop(['Survived', 'Allocated', 'FN', 'FP', 'Exp_Surv', 'Lives_Saved', 'Pop_N', 'Deaths','Std_Pop','w_i', 'w_max', 'Age_Adj_D_var', 'Age_Adj_D_rate', 'Age_Adj_D_rate_CI_hi', 'Age_Adj_D_rate_CI_lo'], axis=1)
        .round(4)
        .set_index(groups)
    )

In [None]:
from scipy.stats.distributions import chi2

#DEFINE Cormorbidity Adjusted YLL Calculator#
def get_Cho_YLS_stats(df_, groups, alpha=0.05):
    return (df_
        .fillna(0)
        .assign(YLL_Cho = lambda df_: df_['Cho_LE'].mask(df_['Survived'] == 1, 0), ## if survived then zero, otherwise retain life expectancy so that we sum to get YLLs.
                Exp_LE_Cho = lambda df_: df_['Baseline_Surv']*df_['Cho_LE']*df_['Capacity'], # if patient would have survived with ventilator then their Cho_LE x Capacity (e.g. 0.5), otherwise 0 (if deceased even with ventilator)
                FN_LE_Cho = lambda df_: (df_['Baseline_Surv']*df_['Cho_LE']).mask(df_['Allocated'] == 1, 0), #return baseline*Cho_LE, then overwrite with 0 if allocated=1
                FP_LE_Cho = lambda df_: (df_['Allocated']*df_['Cho_LE']).mask(df_['Baseline_Surv'] == 1, 0) #return allocated*Cho_LE, then overwrite with 0 if baseline=1
            )
        .groupby(groups, as_index=False)
        .agg(Pop_N=pd.NamedAgg(column="Cho_LE", aggfunc="count"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             LE_Total_Cho=pd.NamedAgg(column="Cho_LE", aggfunc="sum"), #Total life expectancy for everyone, regardless of survival in 100% baseline
             Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who did survive in 100% baseline, multiplied by capacity (i.e. 0.5)
             YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="sum"), #Total life expectancy (lost) of all those who did not survive (in simulation)
             FN_LE_Cho=pd.NamedAgg(column="FN_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who would have survived, but was not allocated (false negatives)
             FP_LE_Cho=pd.NamedAgg(column="FP_LE_Cho", aggfunc="sum") #Total life expectancy for everyone who did NOT survive, but was allocated (false positives)
             )
        #Calculate Cho YLS (i.e. life years over Exp_LE that were "saved" by the protocol)
        .assign(YLS_Cho = lambda df_1: (df_1['LE_Total_Cho']-df_1['YLL_Cho'])-df_1['Exp_LE_Cho'])
        .assign(YLS_Cho_rate = lambda df_2: df_2['YLS_Cho']/df_2['Pop_N'])
        .assign(YLS_Cho_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['YLS_Cho'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )),
                YLS_Cho_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['YLS_Cho']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    ))
            )
        #Calculate FNR and FPR (i.e. how many years of life lost to errors per patient was not allocated (FN) OR patient who was allocated (FP))
        .assign(FNR_LE_Cho = lambda df_0: df_0['FN_LE_Cho']/(df_0['Pop_N']-df_0['Allocated']),
                FPR_LE_Cho = lambda df_0: df_0['FP_LE_Cho']/df_0['Allocated']
            )
        #.drop(['Pop_N','Allocated'], axis=1)
        .round(4) #round all number to two decimal places
        .set_index(groups)
    )

#DEFINE Age-Adjusted Comordity-Adjusted YLL Calculator#
def get_age_adjusted_Cho_YLS_stats(df_, groups, alpha=0.05):
    
    std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]})

    if ('Age_Group' in groups): 
        groups_age = groups
    else:
        groups_age = groups + ['Age_Group']

    return (df_
        .fillna(0)
        .assign(YLL_Cho = lambda df_0: df_0['Cho_LE'].mask(df_0['Survived'] == 1, 0),  ## if survived then zero, otherwise retain life expectancy so that we sum to get YLLs.
                Exp_LE_Cho = lambda df_: df_['Baseline_Surv']*df_['Cho_LE']*df_['Capacity'], # Capacity x Cho_LE if patient would have survived with ventilator, otherwise 0 (if deceased even with ventilator)
                FN_LE_Cho = lambda df_: (df_['Baseline_Surv']*df_['Cho_LE']).mask(df_['Allocated'] == 1, 0), #return baseline*Cho_LE, then overwrite with 0 if allocated=1
                FP_LE_Cho = lambda df_: (df_['Allocated']*df_['Cho_LE']).mask(df_['Baseline_Surv'] == 1, 0) #return allocated*Cho_LE, then overwrite with 0 if baseline=1
            )
        .groupby(groups_age, as_index=True)
        .agg(Pop_N=pd.NamedAgg(column="Cho_LE", aggfunc="count"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             LE_Total_Cho=pd.NamedAgg(column="Cho_LE", aggfunc="sum"),
             Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="sum"),
             YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="sum"),
             FN_LE_Cho=pd.NamedAgg(column="FN_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who would have survived, but was not allocated (false negatives)
             FP_LE_Cho=pd.NamedAgg(column="FP_LE_Cho", aggfunc="sum") #Total life expectancy for everyone who did NOT survive, but was allocated (false positives)
             )
        .reset_index()
        .merge(std_pop, on='Age_Group') #bring in standard pop for age-adjustment
        .assign(Std_Pop = lambda df_0: df_0['Std_Pop'].mask(df_0['Pop_N'] == 0, 0)) #zero out Std_Pop for each sub-group age-band where sub-group has no subjects (i.e. no AIAN in <25)
        .assign(w_i = lambda df_1: df_1['Std_Pop']/df_1['Pop_N']) #calc pop weight for each Age_Group and Race (use max for Fay and Freur, and avg for Tiwari mod)
        .assign(w_max = lambda df_2: df_2.groupby(groups)['w_i'].transform('max')) #find max pop weight for Fay and Freur CIs (note use of transform, see here https://stackoverflow.com/questions/35640364/python-pandas-max-value-in-a-group-as-a-new-column
        #Calculate Age-Adjusted Cho YLL and Cho YLS
        .assign(Age_Adj_FNR_LE_Cho = lambda df_3: (df_3['FN_LE_Cho']/(df_3['Pop_N']-df_3['Allocated'])) * df_3['Std_Pop'], #first converts FN LE into an FNR of LE, then multiples by proportion of std pop in that group
                Age_Adj_FPR_LE_Cho = lambda df_3: (df_3['FP_LE_Cho']/df_3['Allocated']) * df_3['Std_Pop'],    
                Age_Adj_YLL_Cho_rate = lambda df_3: (df_3['YLL_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                Age_Adj_LE_Total_Cho_rate = lambda df_3: (df_3['LE_Total_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                Age_Adj_Exp_LE_Cho_rate = lambda df_3: (df_3['Exp_LE_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                )
        .groupby(groups, as_index=False).sum(numeric_only=True)
        .assign(w_max = lambda df_3: df_3['w_max']/len(df_.groupby('Age_Group').count())) #was 8 #divide sum of max pop weights by number of age-groups (to re-idnetify the max pop weight for Race)
        ##Calculate Age-Adj Years of Life Saved and CIs
        .assign(Age_Adj_YLS_Cho_rate = lambda df_0: (df_0['Age_Adj_LE_Total_Cho_rate']-df_0['Age_Adj_YLL_Cho_rate'])-df_0['Age_Adj_Exp_LE_Cho_rate'], #Calculate Raw Cho-adjusted Years of Life Saved for 'groups'
                )
        .drop(['Pop_N','Allocated','LE_Total_Cho','Exp_LE_Cho', 'YLL_Cho','FN_LE_Cho', 'FP_LE_Cho', 'Std_Pop', 'w_i', 'w_max',
               'Age_Adj_LE_Total_Cho_rate', 'Age_Adj_YLL_Cho_rate',  'Age_Adj_Exp_LE_Cho_rate',  
               ], axis=1)
        .round(4) #round all numbers to 4 decimals places
        .set_index(groups)
    )

#Generators

In [14]:
##################
# Survival Rates #
##################
stats_overall_50 = pd.concat([
    get_raw_stats(df_baseline, ['Protocol', 'Run']),
    get_raw_stats(df_50_Lott, ['Protocol', 'Run']),
    get_raw_stats(df_50_Age, ['Protocol', 'Run']),
    get_raw_stats(df_50_sofa, ['Protocol', 'Run']),
    get_raw_stats(df_50_NY, ['Protocol', 'Run']),
    get_raw_stats(df_50_Colorado, ['Protocol', 'Run']),
    get_raw_stats(df_50_Maryland, ['Protocol', 'Run']),
    get_raw_stats(df_50_Maryland_Age, ['Protocol', 'Run']),
    get_raw_stats(df_50_PA, ['Protocol', 'Run'])
]).reset_index()

stats_race_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_baseline, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_Lott, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_Age, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_NY, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_sofa, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_Colorado, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Maryland, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_Maryland, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_PA, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_PA, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
])

stats_age_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_baseline, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_Lott, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_Age, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_sofa, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_NY, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_Colorado, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Maryland, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_Maryland, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_PA, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_PA, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
])

stats_COVID_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_baseline, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_Lott, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_Age, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_sofa, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_NY, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Maryland, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_Maryland, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_PA, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_PA, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
])


##########################
# Cho YLL Sheets #
##########################
stats_Cho_overall_50 = pd.concat([
    get_Cho_YLS_stats(df_baseline, ['Protocol', 'Run']),  
    get_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run']), 
    get_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_Maryland, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_Maryland_Age, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_PA, ['Protocol', 'Run'])
]).reset_index()

stats_Cho_race_50 = pd.concat([
    pd.concat([get_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Maryland, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_Maryland, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_PA, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_PA, ['Protocol', 'Run', 'Race'])], axis=1).reset_index()
])

stats_Cho_age_50 = pd.concat([
    pd.concat([get_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Maryland, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_Maryland, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_PA, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_PA, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index()
])

stats_Cho_COVID_50 = pd.concat([
    pd.concat([get_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Maryland, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_Maryland, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_Maryland_Age, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_PA, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_PA, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index()
])

###Use below to convert Stats to excel sheets####

with pd.ExcelWriter("MC-50-results-stats.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    stats_overall_50.to_excel(writer, sheet_name="Overall", index=False)
    stats_race_50.to_excel(writer, sheet_name="Race", index=False)
    stats_age_50.to_excel(writer, sheet_name="Age Group", index=False)
    stats_COVID_50.to_excel(writer, sheet_name="COVID Status", index=False)
    stats_Cho_overall_50.to_excel(writer, sheet_name="Cho_Overall", index=False)
    stats_Cho_race_50.to_excel(writer, sheet_name="Cho_Race", index=False)
    stats_Cho_age_50.to_excel(writer, sheet_name="Cho_Age_Group", index=False)
    stats_Cho_COVID_50.to_excel(writer, sheet_name="Cho_COVID_Status", index=False)

# AGGREGATION OF RUNS and Summary Statistics

In [16]:
#####
#Extract processed results from MC-50-results-stats
#####

stats_overall_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name='Overall')
stats_race_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name='Race')
stats_age_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name='Age Group')
stats_COVID_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name='COVID Status')
stats_Cho_overall_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="Cho_Overall")
stats_Cho_race_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="Cho_Race")
stats_Cho_age_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="Cho_Age_Group")
stats_Cho_COVID_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="Cho_COVID_Status")

## Table 2 - Overall Survival Rate, Allocation by Race, and Age-Adjusted Survival by Race

In [15]:
#Alternative way of deriving CIs over the parameters derived in the runs - i.e. traditional CIs across the rates in each run.

def get_CIs_rates(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                Survived=pd.NamedAgg(column="Survived", aggfunc="mean"),
                Allocated=pd.NamedAgg(column="Allocated", aggfunc="mean"),
                A_rate=pd.NamedAgg(column="A_rate", aggfunc="mean"),
                A_std=pd.NamedAgg(column="A_rate", aggfunc="std"),
                A_sem=pd.NamedAgg(column="A_rate", aggfunc="sem"),
                S_rate=pd.NamedAgg(column="S_rate", aggfunc="mean"),
                S_std=pd.NamedAgg(column="S_rate", aggfunc="std"),
                S_sem=pd.NamedAgg(column="S_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(A_rate_CI_lo = lambda df_: df_['A_rate'] - 1.96* df_['A_sem'],
                A_rate_CI_hi = lambda df_:df_['A_rate'] + 1.96* df_['A_sem'],
                S_rate_CI_lo = lambda df_: df_['S_rate'] - 1.96* df_['S_sem'],
                S_rate_CI_hi = lambda df_:df_['S_rate'] + 1.96* df_['S_sem'],
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N','Survived','Allocated','A_rate','A_rate_CI_lo','A_rate_CI_hi', 'S_rate', 'S_rate_CI_lo', 'S_rate_CI_hi'])
            .reset_index()
    )

def get_CIs_rates_with_AA(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                Survived=pd.NamedAgg(column="Survived", aggfunc="mean"),
                Allocated=pd.NamedAgg(column="Allocated", aggfunc="mean"),
                A_rate=pd.NamedAgg(column="A_rate", aggfunc="mean"),
                A_std=pd.NamedAgg(column="A_rate", aggfunc="std"),
                A_sem=pd.NamedAgg(column="A_rate", aggfunc="sem"),
                S_rate=pd.NamedAgg(column="S_rate", aggfunc="mean"),
                S_std=pd.NamedAgg(column="S_rate", aggfunc="std"),
                S_sem=pd.NamedAgg(column="S_rate", aggfunc="sem"),
                Age_Adj_S_rate=pd.NamedAgg(column="Age_Adj_S_rate", aggfunc="mean"),
                Age_Adj_S_std=pd.NamedAgg(column="Age_Adj_S_rate", aggfunc="std"),
                Age_Adj_S_sem=pd.NamedAgg(column="Age_Adj_S_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(A_rate_CI_lo = lambda df_: df_['A_rate'] - 1.96* df_['A_sem'],
                A_rate_CI_hi = lambda df_:df_['A_rate'] + 1.96* df_['A_sem'],
                S_rate_CI_lo = lambda df_: df_['S_rate'] - 1.96* df_['S_sem'],
                S_rate_CI_hi = lambda df_:df_['S_rate'] + 1.96* df_['S_sem'],
                Age_Adj_S_rate_CI_lo = lambda df_: df_['Age_Adj_S_rate'] - 1.96* df_['Age_Adj_S_sem'],
                Age_Adj_S_rate_CI_hi = lambda df_:df_['Age_Adj_S_rate'] + 1.96* df_['Age_Adj_S_sem'],
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N','Survived','Allocated','A_rate','A_rate_CI_lo','A_rate_CI_hi', 'S_rate', 'S_rate_CI_lo',
                               'S_rate_CI_hi', 'Age_Adj_S_rate','Age_Adj_S_rate_CI_lo','Age_Adj_S_rate_CI_hi'])
            .reset_index()
    )

def get_CIs_Cho(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                LE_Total_Cho=pd.NamedAgg(column="LE_Total_Cho", aggfunc="mean"),
                YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="mean"),
                YLL_Cho_std=pd.NamedAgg(column="YLL_Cho", aggfunc="std"),
                YLL_Cho_sem=pd.NamedAgg(column="YLL_Cho", aggfunc="sem"),
            )
            .reset_index()
            .assign(YLL_Cho_CI_lo = lambda df_: df_['YLL_Cho'] - 1.96* df_['YLL_Cho_sem'],
                YLL_Cho_CI_hi = lambda df_:df_['YLL_Cho'] + 1.96* df_['YLL_Cho_sem']
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N', 'LE_Total_Cho', 'YLL_Cho','YLL_Cho_CI_lo','YLL_Cho_CI_hi'])
            .reset_index()
    )

##Currently have remove Age-Adjustment since it is hard to interpret in this context.
def get_CIs_Cho_with_AA(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                LE_Total_Cho=pd.NamedAgg(column="LE_Total_Cho", aggfunc="mean"),
                YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="mean"),
                YLL_Cho_std=pd.NamedAgg(column="YLL_Cho", aggfunc="std"),
                YLL_Cho_sem=pd.NamedAgg(column="YLL_Cho", aggfunc="sem"),
                )
            .reset_index()
            .assign(YLL_Cho_CI_lo = lambda df_: df_['YLL_Cho'] - 1.96* df_['YLL_Cho_sem'],
                YLL_Cho_CI_hi = lambda df_:df_['YLL_Cho'] + 1.96* df_['YLL_Cho_sem'],
                )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N', 'LE_Total_Cho', 'YLL_Cho', 'YLL_Cho_CI_lo', 'YLL_Cho_CI_hi', 'Age_Adj_YLL_Cho_rate','Age_Adj_YLL_Cho_rate_CI_lo','Age_Adj_YLL_Cho_rate_CI_hi'])
            .reset_index()
    )


with pd.ExcelWriter("MC-50-Table_2_Allocation_Survival.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    # 
    pd.concat([stats_overall_50[(stats_overall_50['Protocol']=='Baseline')], 
               get_CIs_rates(stats_overall_50[stats_overall_50.Protocol != 'Baseline'], ['Protocol'])]).to_excel(writer, sheet_name="Overall", index=False)
    pd.concat([stats_race_50[(stats_race_50['Protocol']=='Baseline')], 
               get_CIs_rates_with_AA(stats_race_50[stats_race_50.Protocol != 'Baseline'], ['Protocol', 'Race'])]).to_excel(writer, sheet_name="Race", index=False)
    pd.concat([stats_age_50[(stats_age_50['Protocol']=='Baseline')], 
               get_CIs_rates_with_AA(stats_age_50[stats_age_50.Protocol != 'Baseline'], ['Protocol', 'Age_Group'])]).to_excel(writer, sheet_name="Age_Group", index=False)
    pd.concat([stats_COVID_50[(stats_COVID_50['Protocol']=='Baseline')], 
               get_CIs_rates_with_AA(stats_COVID_50[stats_COVID_50.Protocol != 'Baseline'], ['Protocol', 'COVID_Status'])]).to_excel(writer, sheet_name="COVID_Status", index=False)
    
    pd.concat([stats_Cho_overall_50[(stats_Cho_overall_50['Protocol']=='Baseline')], 
               get_CIs_Cho(stats_Cho_overall_50[stats_Cho_overall_50.Protocol != 'Baseline'], ['Protocol'])]).to_excel(writer, sheet_name="Cho_Overall", index=False)
    pd.concat([stats_Cho_race_50[(stats_Cho_race_50['Protocol']=='Baseline')], 
               get_CIs_Cho_with_AA(stats_Cho_race_50[stats_Cho_race_50.Protocol != 'Baseline'], ['Protocol', 'Race'])]).to_excel(writer, sheet_name="Cho_Race", index=False)
    pd.concat([stats_Cho_age_50[(stats_Cho_age_50['Protocol']=='Baseline')], 
               get_CIs_Cho_with_AA(stats_Cho_age_50[stats_Cho_age_50.Protocol != 'Baseline'], ['Protocol', 'Age_Group'])]).to_excel(writer, sheet_name="Cho_Age_Group", index=False)
    pd.concat([stats_Cho_COVID_50[(stats_Cho_COVID_50['Protocol']=='Baseline')], 
               get_CIs_Cho_with_AA(stats_Cho_COVID_50[stats_Cho_COVID_50.Protocol != 'Baseline'], ['Protocol', 'COVID_Status'])]).to_excel(writer, sheet_name="Cho_COVID_Status", index=False)

## Table 3 - Lives Saved and Years of Life Saved

In [16]:
def get_LS(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                Allocated=pd.NamedAgg(column="Allocated", aggfunc="mean"),
                Survived=pd.NamedAgg(column="Survived", aggfunc="mean"),
                Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="mean"),
                Lives_Saved=pd.NamedAgg(column="Lives_Saved", aggfunc="mean"),
                LS_std=pd.NamedAgg(column="Lives_Saved", aggfunc="std"),
                LS_sem=pd.NamedAgg(column="Lives_Saved", aggfunc="sem"),
                LS_rate=pd.NamedAgg(column="LS_rate", aggfunc="mean"),
                LS_rate_std=pd.NamedAgg(column="LS_rate", aggfunc="std"),
                LS_rate_sem=pd.NamedAgg(column="LS_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(
                LS_CI_lo = lambda df_3: df_3['Lives_Saved'] - 1.96* df_3['LS_sem'],
                LS_CI_hi = lambda df_3:df_3['Lives_Saved'] + 1.96* df_3['LS_sem'],
                LS_rate_CI_lo = lambda df_3: df_3['LS_rate'] - 1.96* df_3['LS_rate_sem'],
                LS_rate_CI_hi = lambda df_3:df_3['LS_rate'] + 1.96* df_3['LS_rate_sem'],
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N','Allocated','Survived', 'Exp_Surv', 'Lives_Saved', 'LS_CI_lo', 'LS_CI_hi', 'LS_rate', 'LS_rate_CI_lo', 'LS_rate_CI_hi'])
            .reset_index()
    )

def get_YLS_Cho(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                LE_Total_Cho=pd.NamedAgg(column="LE_Total_Cho", aggfunc="mean"),
                Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="mean"),
                YLS_Cho=pd.NamedAgg(column="YLS_Cho", aggfunc="mean"),
                YLS_Cho_std=pd.NamedAgg(column="YLS_Cho", aggfunc="std"),
                YLS_Cho_sem=pd.NamedAgg(column="YLS_Cho", aggfunc="sem"),
                YLS_Cho_rate=pd.NamedAgg(column="YLS_Cho_rate", aggfunc="mean"),
                YLS_Cho_rate_std=pd.NamedAgg(column="YLS_Cho_rate", aggfunc="std"),
                YLS_Cho_rate_sem=pd.NamedAgg(column="YLS_Cho_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(YLS_Cho_CI_lo = lambda df_3: df_3['YLS_Cho'] - 1.96* df_3['YLS_Cho_sem'],
                YLS_Cho_CI_hi = lambda df_3:df_3['YLS_Cho'] + 1.96* df_3['YLS_Cho_sem'],
                YLS_Cho_rate_CI_lo = lambda df_3: df_3['YLS_Cho_rate'] - 1.96* df_3['YLS_Cho_rate_sem'],
                YLS_Cho_rate_CI_hi = lambda df_3:df_3['YLS_Cho_rate'] + 1.96* df_3['YLS_Cho_rate_sem']
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N', 'LE_Total_Cho', 'Exp_LE_Cho', 'YLS_Cho','YLS_Cho_CI_lo','YLS_Cho_CI_hi', 'YLS_Cho_rate','YLS_Cho_rate_CI_lo','YLS_Cho_rate_CI_hi'])
            .reset_index()
    )
    
with pd.ExcelWriter("MC-50-Table_3_LS_and_YLS.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    get_LS(stats_overall_50[stats_overall_50.Protocol != 'Baseline'], ['Protocol']).to_excel(writer, sheet_name="Overall", index=False)
    get_LS(stats_race_50[stats_race_50.Protocol != 'Baseline'], ['Protocol', 'Race']).to_excel(writer, sheet_name="Race", index=False)
    get_LS(stats_age_50[stats_age_50.Protocol != 'Baseline'], ['Protocol', 'Age_Group']).to_excel(writer, sheet_name="Age_Group", index=False)
    get_LS(stats_COVID_50[stats_COVID_50.Protocol != 'Baseline'], ['Protocol', 'COVID_Status']).to_excel(writer, sheet_name="COVID_Status", index=False)
        
    get_YLS_Cho(stats_Cho_overall_50[stats_Cho_overall_50.Protocol != 'Baseline'], ['Protocol']).to_excel(writer, sheet_name="Cho_Overall", index=False)
    get_YLS_Cho(stats_Cho_race_50[stats_Cho_race_50.Protocol != 'Baseline'], ['Protocol', 'Race']).to_excel(writer, sheet_name="Cho_Race", index=False)
    get_YLS_Cho(stats_Cho_age_50[stats_Cho_age_50.Protocol != 'Baseline'], ['Protocol', 'Age_Group']).to_excel(writer, sheet_name="Cho_Age_Group", index=False)
    get_YLS_Cho(stats_Cho_COVID_50[stats_Cho_COVID_50.Protocol != 'Baseline'], ['Protocol', 'COVID_Status']).to_excel(writer, sheet_name="Cho_COVID_Status", index=False)