# Preliminaries and Dataframe Construction

In [None]:
# Import modules
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats

#Formatting
plt.rcParams['font.family'] = 'Times New Roman'  # Set plt shows font to Times New Roman
plt.rcParams['axes.grid'] = True  # Ensure line graphs display on graphs
sns.set_palette(sns.color_palette('Accent')) #set color palette to a nice seaborn style https://seaborn.pydata.org/tutorial/color_palettes.html

: 

In [2]:
#Import Encounters from Database Query
df_baseline = pd.read_pickle("encounters.pkl").assign(Run = 1, Capacity = 1, Allocated = 1, Baseline_Surv = lambda df_: df_['Survived'])
print(df_baseline.info())

df_demographics = (df_baseline
    .assign(Baseline_Surv = lambda df_baseline: df_baseline['Survived']) #create column for whether individual survived at 100% capacity / with ventilator support
    .reindex(columns = [
    'EncounterID',
    'Race',
    'Sex',
    'Age_Group',
    'COVID_Status',
    'Baseline_Surv',
    'LE',
    'Cho_LE']
    )
)

df_50_NY = pd.read_csv('MC_NY_50.csv', converters={'EncounterID':str}).assign(Protocol = 'NY SOFA').merge(df_demographics, on=['EncounterID'])
df_50_Age = pd.read_csv('MC_Age_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Age').merge(df_demographics, on=['EncounterID', 'Age_Group'])
df_50_Lott = pd.read_csv('MC_Lott_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Lottery').merge(df_demographics, on=['EncounterID'])
df_50_Bhavani = pd.read_csv('MC_Bhavani_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Bhavani').merge(df_demographics, on=['EncounterID'])
df_50_Colorado = pd.read_csv('MC_Colorado_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Colorado').merge(df_demographics, on=['EncounterID'])
df_50_sofa = pd.read_csv('MC_sofa_50.csv', converters={'EncounterID':str}).assign(Protocol = 'Pure SOFA').merge(df_demographics, on=['EncounterID'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3707 entries, 0 to 3706
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   EncounterID       3707 non-null   object  
 1   SubjectID         3707 non-null   object  
 2   Age               3707 non-null   float64 
 3   Race              3707 non-null   object  
 4   Ethnicity         3707 non-null   object  
 5   Sex               3707 non-null   object  
 6   InitialSOFA       3707 non-null   int64   
 7   StayLength        3707 non-null   float64 
 8   LE                3707 non-null   float64 
 9   Cho_LE            3707 non-null   float64 
 10  COVID_Status      3707 non-null   int8    
 11  Discharge_Status  3707 non-null   int64   
 12  Intubation        3707 non-null   int64   
 13  NY_Score          3707 non-null   int8    
 14  Bhavani_Score     3707 non-null   int64   
 15  Colorado_Score    3707 non-null   int64   
 16  Protocol          3707 n

TO-DO
- Create list of columns for neat aggregation of Raw and Age-Adjusted Stats
- Create FPR and FNR columns (i.e. if allocated =1 and Baseline_Surv = 0, then FP, if allocated = 0 and Baseline_Surv=1 then FN)
- Adjust the Table 2 and Table 3 code to conform to the new summary stats.csv
- Generate new scatter plot for Lives Saved Rate by Life Years Saved Rate

In [33]:
from scipy.stats.distributions import chi2

#DEFINE Raw Stats Calculator#
def get_raw_stats(df_, groups, alpha=0.05):
    return (df_
        .fillna(0)
        .assign(Exp_Surv = lambda df_: df_['Baseline_Surv']*df_['Capacity'], #multiples each individual patient (i.e. 1 or 0) by capacity (e.g. 0.5) to get expected survival in agg.
                FN = lambda df_: df_['Baseline_Surv'].mask(df_['Allocated'] == 1, 0), #return baseline, then overwrite with 0 if allocated=1
                FP = lambda df_: df_['Allocated'].mask(df_['Baseline_Surv'] == 1, 0) #return allocated, then overwrite with 0 if baseline=1
            )          
        .groupby(groups, as_index=True)
        .agg(Pop_N=pd.NamedAgg(column="Survived", aggfunc="count"),
             Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="sum"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             Survived=pd.NamedAgg(column="Survived", aggfunc="sum"),
             FN=pd.NamedAgg(column="FN", aggfunc="sum"),
             FP=pd.NamedAgg(column="FP", aggfunc="sum")
             )
        .reset_index()
        #Calculate Lives Saved and Allocation Rate
        .assign(Lives_Saved = lambda df_0: df_0['Survived']-df_0['Exp_Surv'],
                A_rate = lambda df_0: df_0['Allocated'] / df_0['Pop_N']
            )
        .assign(A_rate_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['Allocated'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N'],
                A_rate_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['Allocated']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N']
            )
        #Calculate Survival Rate (by first calculating death rate)
        .assign(Deaths = lambda df_0: df_0['Pop_N'] - df_0['Survived'])
        .assign(D_rate = lambda df_1: df_1['Deaths'] / df_1['Pop_N'])
        .assign(D_rate_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['Deaths'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N'],
                D_rate_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['Deaths']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N']
            )
        .assign(S_rate = lambda df_3: 1-df_3['D_rate'],
                S_rate_CI_lo = lambda df_3: 1-df_3['D_rate_CI_hi'],
                S_rate_CI_hi = lambda df_3: 1-df_3['D_rate_CI_lo'])
        #Calculate FNR, FPR and Lives Saved per Patient
        .assign(FN_rate = lambda df_0: df_0['FN']/(df_0['Pop_N']-df_0['Allocated']),
                FP_rate = lambda df_0: df_0['FP']/df_0['Allocated'],
                LS_rate = lambda df_0: df_0['Lives_Saved']/df_0['Pop_N']
            )
        #Cleanup
        .drop(['Deaths', 'D_rate', 'D_rate_CI_hi', 'D_rate_CI_lo'], axis=1)
        .round(4)
        .set_index(groups)
    )

#DEFINE Age-Adjusted Calculator#
def get_age_adjusted_stats(df_, groups, alpha=0.05):
    
    std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]})

    if ('Age_Group' in groups): 
        groups_age = groups
    else:
        groups_age = groups + ['Age_Group']

    return (df_
        .fillna(0)
        .assign(Exp_Surv = lambda df_: df_['Baseline_Surv']*df_['Capacity'], #multiples each individual patient (i.e. 1 or 0) by capacity (e.g. 0.5) to get expected survival in agg.
                FN = lambda df_: df_['Baseline_Surv'].mask(df_['Allocated'] == 1, 0), #return baseline, then overwrite with 0 if allocated=1
                FP = lambda df_: df_['Allocated'].mask(df_['Baseline_Surv'] == 1, 0) #return allocated, then overwrite with 0 if baseline=1
            )
        #calculate population totals for each age group (additionally sliced by other variables, e.g. protocol, run, race)
        .groupby(groups_age, as_index=True) #originally false
        .agg(Pop_N=pd.NamedAgg(column="Survived", aggfunc="count"),
             Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="sum"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             Survived=pd.NamedAgg(column="Survived", aggfunc="sum"),
             FN=pd.NamedAgg(column="FN", aggfunc="sum"),
             FP=pd.NamedAgg(column="FP", aggfunc="sum")             
             )
        .reset_index()
        .merge(std_pop, on='Age_Group') #bring in standard pop for age-adjustment
        .assign(Std_Pop = lambda df_0: df_0['Std_Pop'].mask(df_0['Pop_N'] == 0, 0), #zero out Std_Pop for each sub-group age-band where sub-group has no subjects (i.e. no AIAN in <25)
                Deaths = lambda df_0: df_0['Pop_N'] - df_0['Survived'],
                Lives_Saved = lambda df_0: df_0['Survived']-df_0['Exp_Surv']
            )
        #Calculate Age-Adj Deaths and Variance for each age-group
        .assign(Age_Adj_D_rate = lambda df_1: (df_1['Deaths']/df_1['Pop_N']) * df_1['Std_Pop'], #calculate the age-adjusted rate (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                #Age_Adj_D_var = lambda df_1: (df_1['Std_Pop']**2)*(df_1['Deaths']/(df_1['Pop_N']**2)), ## OLD VERSION, unclear why used.
                Age_Adj_D_var = lambda df_1: df_1['Deaths']*((df_1['Std_Pop']/df_1['Pop_N'])**2), ## SEER STAT version https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm
                #variance for each age_group to be summed for total variance of Race (see WA Health doc) https://doh.wa.gov/sites/default/files/legacy/Documents/1500//ConfIntGuide.pdf
         ## Calculate Age-Adjusted FNR and FPR for each age-group
                Age_Adj_FN_rate = lambda df_1: (df_1['FN']/(df_1['Pop_N']-df_1['Allocated'])) * df_1['Std_Pop'], #first converts LS into an LS-rate, then multiples by proportion of std pop in that group
                Age_Adj_FP_rate = lambda df_1: (df_1['FP']/df_1['Allocated']) * df_1['Std_Pop'],
        ## Calculate Age-Adjusted Lives Saved Rate for each age group
                Age_Adj_LS_rate = lambda df_1: (df_1['Lives_Saved']/df_1['Pop_N']) * df_1['Std_Pop'], #first converts LS into an LS-rate, then multiples by proportion of std pop in that group
            )
        #Assign the w variables to each Age_Group (and other groupings)
        .assign(w_i = lambda df_1: df_1['Std_Pop']/df_1['Pop_N']) #calc pop weight for each Age_Group and Race (use max for Fay and Freur, and avg for Tiwari mod)
        .assign(w_max = lambda df_2: df_2.groupby(groups)['w_i'].transform('max')) #find max pop weight for Fay and Freur CIs (note use of transform, see here https://stackoverflow.com/questions/35640364/python-pandas-max-value-in-a-group-as-a-new-column
        #Collapse the age-groups to calculate total age-adjusted deaths/lives saved.
        .groupby(groups, as_index=False).sum(numeric_only=True)
        .assign(w_max = lambda df_3: df_3['w_max']/len(df_.groupby('Age_Group').count())) #divide sum of max pop weights by number of age-groups - i.e. 8 (to re-idnetify the max pop weight for Race)
        ## Calculate Fay-Feur CIs for Age-Adjusted Death Rates
        .assign(Age_Adj_D_rate_CI_lo = lambda df_3: 
                    (df_3['Age_Adj_D_var'])/(2*df_3['Age_Adj_D_rate']) *
                    chi2.ppf(alpha/2, #alpha 
                        (2*df_3['Age_Adj_D_rate']**2)/df_3['Age_Adj_D_var']), #shape
                Age_Adj_D_rate_CI_hi = lambda df_3: 
                    ((df_3['Age_Adj_D_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_D_rate']+df_3['w_max']))) *
                    chi2.ppf(1-alpha/2, # alpha
                        (2*(df_3['Age_Adj_D_rate']+df_3['w_max'])**2)/(df_3['Age_Adj_D_var']+df_3['w_max']**2)) #shape
            )
        #Calculate Age-Adjusted Survival Rates and CIs (as inverse of AA Death Rates and CIs)
        .assign(Age_Adj_S_rate = lambda df_4: 1-df_4['Age_Adj_D_rate'],
                Age_Adj_S_rate_CI_lo = lambda df_4: 1-df_4['Age_Adj_D_rate_CI_hi'],
                Age_Adj_S_rate_CI_hi = lambda df_4: 1-df_4['Age_Adj_D_rate_CI_lo'],
            )
        .drop(['Survived', 'Allocated', 'FN', 'FP', 'Exp_Surv', 'Lives_Saved', 'Pop_N', 'Deaths','Std_Pop','w_i', 'w_max', 'Age_Adj_D_var', 'Age_Adj_D_rate', 'Age_Adj_D_rate_CI_hi', 'Age_Adj_D_rate_CI_lo'], axis=1)
        .round(4)
        .set_index(groups)
    )

#df_test = get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'Race']).reset_index()
#df_test = (pd.concat((get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_sofa, ['Protocol', 'Run', 'Race'])), axis=1).reset_index())

NOTE: 
- Misallocation costs of life years
    - False Negative Life Expectancies are "years of life lost to allocation denials" (i.e. if we had a perfect allocation system we would have retained those years of life)
    - False Positive Life Expectancies are "years of life that ????" - very hard to parse...

In [38]:
from scipy.stats.distributions import chi2

#DEFINE Cormorbidity Adjusted YLL Calculator#
def get_Cho_YLS_stats(df_, groups, alpha=0.05):
    return (df_
        .fillna(0)
        .assign(YLL_Cho = lambda df_: df_['Cho_LE'].mask(df_['Survived'] == 1, 0), ## if survived then zero, otherwise retain life expectancy so that we sum to get YLLs.
                Exp_LE_Cho = lambda df_: df_['Baseline_Surv']*df_['Cho_LE']*df_['Capacity'], # if patient would have survived with ventilator then their Cho_LE x Capacity (e.g. 0.5), otherwise 0 (if deceased even with ventilator)
                FN_LE_Cho = lambda df_: (df_['Baseline_Surv']*df_['Cho_LE']).mask(df_['Allocated'] == 1, 0), #return baseline*Cho_LE, then overwrite with 0 if allocated=1
                FP_LE_Cho = lambda df_: (df_['Allocated']*df_['Cho_LE']).mask(df_['Baseline_Surv'] == 1, 0) #return allocated*Cho_LE, then overwrite with 0 if baseline=1
            )
        .groupby(groups, as_index=False)
        .agg(Pop_N=pd.NamedAgg(column="Cho_LE", aggfunc="count"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             LE_Total_Cho=pd.NamedAgg(column="Cho_LE", aggfunc="sum"), #Total life expectancy for everyone, regardless of survival in 100% baseline
             Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who did survive in 100% baseline, multiplied by capacity (i.e. 0.5)
             YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="sum"), #Total life expectancy (lost) of all those who did not survive (in simulation)
             FN_LE_Cho=pd.NamedAgg(column="FN_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who would have survived, but was not allocated (false negatives)
             FP_LE_Cho=pd.NamedAgg(column="FP_LE_Cho", aggfunc="sum") #Total life expectancy for everyone who did NOT survive, but was allocated (false positives)
             )
        #Calculate Cho YLL CIs
        #.assign(YLL_Cho_CI_lo = lambda df_2: (0.5*chi2.ppf(
        #            alpha/2, #alpha
        #            2*df_2['YLL_Cho'] #shape (N.B.: if shape is zero, then result should be defined as zero)
        #            )),
        #        YLL_Cho_CI_hi = lambda df_2: (0.5*chi2.ppf(
        #            1 - alpha/2, #alpha
        #            2*(df_2['YLL_Cho']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
        #            ))
        #    )
        #Calculate Cho YLS (i.e. life years over Exp_LE that were "saved" by the protocol)
        .assign(YLS_Cho = lambda df_1: (df_1['LE_Total_Cho']-df_1['YLL_Cho'])-df_1['Exp_LE_Cho'])
        .assign(YLS_Cho_rate = lambda df_2: df_2['YLS_Cho']/df_2['Pop_N'])
        .assign(YLS_Cho_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['YLS_Cho'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )),
                YLS_Cho_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['YLS_Cho']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    ))
            )
        #Calculate FNR and FPR (i.e. how many years of life lost to errors per patient was not allocated (FN) OR patient who was allocated (FP))
        .assign(FNR_LE_Cho = lambda df_0: df_0['FN_LE_Cho']/(df_0['Pop_N']-df_0['Allocated']),
                FPR_LE_Cho = lambda df_0: df_0['FP_LE_Cho']/df_0['Allocated']
            )
        #.drop(['Pop_N','Allocated'], axis=1)
        .round(4) #round all number to two decimal places
        .set_index(groups)
    )

#DEFINE Age-Adjusted Comordity-Adjusted YLL Calculator#
def get_age_adjusted_Cho_YLS_stats(df_, groups, alpha=0.05):
    
    std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]})

    if ('Age_Group' in groups): 
        groups_age = groups
    else:
        groups_age = groups + ['Age_Group']

    return (df_
        .fillna(0)
        .assign(YLL_Cho = lambda df_0: df_0['Cho_LE'].mask(df_0['Survived'] == 1, 0),  ## if survived then zero, otherwise retain life expectancy so that we sum to get YLLs.
                Exp_LE_Cho = lambda df_: df_['Baseline_Surv']*df_['Cho_LE']*df_['Capacity'], # Capacity x Cho_LE if patient would have survived with ventilator, otherwise 0 (if deceased even with ventilator)
                FN_LE_Cho = lambda df_: (df_['Baseline_Surv']*df_['Cho_LE']).mask(df_['Allocated'] == 1, 0), #return baseline*Cho_LE, then overwrite with 0 if allocated=1
                FP_LE_Cho = lambda df_: (df_['Allocated']*df_['Cho_LE']).mask(df_['Baseline_Surv'] == 1, 0) #return allocated*Cho_LE, then overwrite with 0 if baseline=1
            )
        .groupby(groups_age, as_index=True)
        .agg(Pop_N=pd.NamedAgg(column="Cho_LE", aggfunc="count"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             LE_Total_Cho=pd.NamedAgg(column="Cho_LE", aggfunc="sum"),
             Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="sum"),
             YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="sum"),
             FN_LE_Cho=pd.NamedAgg(column="FN_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who would have survived, but was not allocated (false negatives)
             FP_LE_Cho=pd.NamedAgg(column="FP_LE_Cho", aggfunc="sum") #Total life expectancy for everyone who did NOT survive, but was allocated (false positives)
             )
        .reset_index()
        .merge(std_pop, on='Age_Group') #bring in standard pop for age-adjustment
        .assign(Std_Pop = lambda df_0: df_0['Std_Pop'].mask(df_0['Pop_N'] == 0, 0)) #zero out Std_Pop for each sub-group age-band where sub-group has no subjects (i.e. no AIAN in <25)
        .assign(w_i = lambda df_1: df_1['Std_Pop']/df_1['Pop_N']) #calc pop weight for each Age_Group and Race (use max for Fay and Freur, and avg for Tiwari mod)
        .assign(w_max = lambda df_2: df_2.groupby(groups)['w_i'].transform('max')) #find max pop weight for Fay and Freur CIs (note use of transform, see here https://stackoverflow.com/questions/35640364/python-pandas-max-value-in-a-group-as-a-new-column
        #Calculate Age-Adjusted Cho YLL and Cho YLS
        .assign(Age_Adj_FNR_LE_Cho = lambda df_3: (df_3['FN_LE_Cho']/(df_3['Pop_N']-df_3['Allocated'])) * df_3['Std_Pop'], #first converts FN LE into an FNR of LE, then multiples by proportion of std pop in that group
                Age_Adj_FPR_LE_Cho = lambda df_3: (df_3['FP_LE_Cho']/df_3['Allocated']) * df_3['Std_Pop'],    
                #Age_Adj_YLS_Cho_rate = lambda df_3: (df_3['YLS_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLS rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                #Age_Adj_YLS_Cho_var = lambda df_3: df_3['YLS_Cho']*((df_3['Std_Pop']/df_3['Pop_N'])**2) #variance for each age_group to be summed for total variance of Race (see WA Health doc)
                Age_Adj_YLL_Cho_rate = lambda df_3: (df_3['YLL_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                #Age_Adj_YLL_Cho_var = lambda df_3: df_3['YLL_Cho']*((df_3['Std_Pop']/df_3['Pop_N'])**2), #variance for each age_group to be summed for total variance for each race (see WA Health doc)
                Age_Adj_LE_Total_Cho_rate = lambda df_3: (df_3['LE_Total_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                #Age_Adj_LE_Total_Cho_var = lambda df_3: df_3['LE_Total_Cho']*((df_3['Std_Pop']/df_3['Pop_N'])**2), #variance for each age_group to be summed for total variance for each race (see WA Health doc)
                Age_Adj_Exp_LE_Cho_rate = lambda df_3: (df_3['Exp_LE_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                #Age_Adj_Exp_LE_Cho_var = lambda df_3: df_3['Exp_LE_Cho']*((df_3['Std_Pop']/df_3['Pop_N'])**2) #variance for each age_group to be summed for total variance for each race (see WA Health doc)
            )
        .groupby(groups, as_index=False).sum(numeric_only=True)
        .assign(w_max = lambda df_3: df_3['w_max']/len(df_.groupby('Age_Group').count())) #was 8 #divide sum of max pop weights by number of age-groups (to re-idnetify the max pop weight for Race)
        ##Calculate Age-Adj Years of Life Saved and CIs
        .assign(Age_Adj_YLS_Cho_rate = lambda df_0: (df_0['Age_Adj_LE_Total_Cho_rate']-df_0['Age_Adj_YLL_Cho_rate'])-df_0['Age_Adj_Exp_LE_Cho_rate'], #Calculate Raw Cho-adjusted Years of Life Saved for 'groups'
                #Age_Adj_YLS_Cho_rate_CI_lo = lambda df_0: (df_0['Age_Adj_LE_Total_Cho_rate_CI_lo']-df_0['Age_Adj_YLL_Cho_rate_CI_lo'])-df_0['Age_Adj_Exp_LE_Cho_rate_CI_lo'],
                #Age_Adj_YLS_Cho_rate_CI_hi = lambda df_0: (df_0['Age_Adj_LE_Total_Cho_rate_CI_hi']-df_0['Age_Adj_YLL_Cho_rate_CI_hi'])-df_0['Age_Adj_Exp_LE_Cho_rate_CI_hi'],
            )
        .drop(['Pop_N','Allocated','LE_Total_Cho','Exp_LE_Cho', 'YLL_Cho','FN_LE_Cho', 'FP_LE_Cho', 'Std_Pop', 'w_i', 'w_max',
               'Age_Adj_LE_Total_Cho_rate', 'Age_Adj_YLL_Cho_rate',  'Age_Adj_Exp_LE_Cho_rate',  
               #'Age_Adj_LE_Total_Cho_var', 'Age_Adj_YLL_Cho_var', 'Age_Adj_Exp_LE_Cho_var',
               ], axis=1)
        .round(4) #round all numbers to 4 decimals places
        .set_index(groups)
    )

#df_test = (pd.concat((get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Race'])), axis=1).reset_index())

'''
        ##Calculate , by calculating AA_YLL, AA_Total_LE, and AA_Exp_LE_Cho
        .assign(##Calculate CIs for Age_Adj_YLL_Cho
                Age_Adj_YLL_Cho_rate_CI_lo = lambda df_3: 
                    (df_3['Age_Adj_YLL_Cho_var'])/(2*df_3['Age_Adj_YLL_Cho_rate']) *
                    chi2.ppf(alpha/2, #alpha 
                        (2*df_3['Age_Adj_YLL_Cho_rate']**2)/df_3['Age_Adj_YLL_Cho_var']), #shape
                Age_Adj_YLL_Cho_rate_CI_hi = lambda df_3: 
                    ((df_3['Age_Adj_YLL_Cho_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_YLL_Cho_rate']+df_3['w_max']))) *
                    chi2.ppf(1-alpha/2, # alpha
                        (2*(df_3['Age_Adj_YLL_Cho_rate']+df_3['w_max'])**2)/(df_3['Age_Adj_YLL_Cho_var']+df_3['w_max']**2)), #shape
                ##Calculate CIs for Age_Adj_Total_LE_Cho
                Age_Adj_LE_Total_Cho_rate_CI_lo = lambda df_3: 
                    (df_3['Age_Adj_LE_Total_Cho_var'])/(2*df_3['Age_Adj_LE_Total_Cho_rate']) *
                    chi2.ppf(alpha/2, #alpha 
                        (2*df_3['Age_Adj_LE_Total_Cho_rate']**2)/df_3['Age_Adj_LE_Total_Cho_var']), #shape
                Age_Adj_LE_Total_Cho_rate_CI_hi = lambda df_3: 
                    ((df_3['Age_Adj_LE_Total_Cho_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_LE_Total_Cho_rate']+df_3['w_max']))) *
                    chi2.ppf(1-alpha/2, # alpha
                        (2*(df_3['Age_Adj_LE_Total_Cho_rate']+df_3['w_max'])**2)/(df_3['Age_Adj_LE_Total_Cho_var']+df_3['w_max']**2)), #shape
                ##Calculate CIs for Age_Adj_Exp_LE_Cho
                Age_Adj_Exp_LE_Cho_rate_CI_lo = lambda df_3: 
                    (df_3['Age_Adj_Exp_LE_Cho_var'])/(2*df_3['Age_Adj_Exp_LE_Cho_rate']) *
                    chi2.ppf(alpha/2, #alpha 
                        (2*df_3['Age_Adj_Exp_LE_Cho_rate']**2)/df_3['Age_Adj_Exp_LE_Cho_var']), #shape
                Age_Adj_Exp_LE_Cho_rate_CI_hi = lambda df_3: 
                    ((df_3['Age_Adj_Exp_LE_Cho_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_Exp_LE_Cho_rate']+df_3['w_max']))) *
                    chi2.ppf(1-alpha/2, # alpha
                        (2*(df_3['Age_Adj_Exp_LE_Cho_rate']+df_3['w_max'])**2)/(df_3['Age_Adj_Exp_LE_Cho_var']+df_3['w_max']**2)), #shape
            )
        '''

                #Calculate Fay-Feur CIs for Age-Adj, CoMorbid-Adj Years of Life Saved
                #Age_Adj_YLS_Cho_rate_CI_lo = lambda df_3: 
                #    (df_3['Age_Adj_YLS_Cho_var'])/(2*df_3['Age_Adj_YLS_Cho_rate']) *
                #    chi2.ppf(alpha/2, #alpha 
                #        (2*df_3['Age_Adj_YLS_Cho_rate']**2)/df_3['Age_Adj_YLS_Cho_var']), #shape
                #Age_Adj_YLS_Cho_rate_CI_hi = lambda df_3: 
                #    ((df_3['Age_Adj_YLS_Cho_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_YLS_Cho_rate']+df_3['w_max']))) *
                #    chi2.ppf(1-alpha/2, # alpha
                #        (2*(df_3['Age_Adj_YLS_Cho_rate']+df_3['w_max'])**2)/(df_3['Age_Adj_YLS_Cho_var']+df_3['w_max']**2)) #shape

"\n        ##Calculate , by calculating AA_YLL, AA_Total_LE, and AA_Exp_LE_Cho\n        .assign(##Calculate CIs for Age_Adj_YLL_Cho\n                Age_Adj_YLL_Cho_rate_CI_lo = lambda df_3: \n                    (df_3['Age_Adj_YLL_Cho_var'])/(2*df_3['Age_Adj_YLL_Cho_rate']) *\n                    chi2.ppf(alpha/2, #alpha \n                        (2*df_3['Age_Adj_YLL_Cho_rate']**2)/df_3['Age_Adj_YLL_Cho_var']), #shape\n                Age_Adj_YLL_Cho_rate_CI_hi = lambda df_3: \n                    ((df_3['Age_Adj_YLL_Cho_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_YLL_Cho_rate']+df_3['w_max']))) *\n                    chi2.ppf(1-alpha/2, # alpha\n                        (2*(df_3['Age_Adj_YLL_Cho_rate']+df_3['w_max'])**2)/(df_3['Age_Adj_YLL_Cho_var']+df_3['w_max']**2)), #shape\n                ##Calculate CIs for Age_Adj_Total_LE_Cho\n                Age_Adj_LE_Total_Cho_rate_CI_lo = lambda df_3: \n                    (df_3['Age_Adj_LE_Total_Cho_var'])/(2*df_3['Age_Adj_LE_Total_Cho_rate'

In [37]:
#stats_LE_TEST =  get_raw_YLL(df_50_NY, ['Run','Protocol', 'Race']).reset_index()
#stats_LE_TEST = get_age_adjusted_YLL(df_50_NY, ['Run','Protocol', 'Race']).reset_index()
df_test =  pd.concat([get_raw_stats(df_50_NY, ['Run','Protocol', 'Race']), get_age_adjusted_stats(df_50_NY, ['Run','Protocol', 'Race']), get_Cho_YLS_stats(df_50_NY, ['Run','Protocol', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_NY, ['Run','Protocol', 'Race'])], axis=1).reset_index()

#Generators

In [39]:
##################
# Survival Rates #
##################
stats_overall_50 = pd.concat([
    get_raw_stats(df_baseline, ['Protocol', 'Run']),
    get_raw_stats(df_50_Lott, ['Protocol', 'Run']),
    get_raw_stats(df_50_Age, ['Protocol', 'Run']),
    get_raw_stats(df_50_sofa, ['Protocol', 'Run']),
    get_raw_stats(df_50_NY, ['Protocol', 'Run']),
    get_raw_stats(df_50_Colorado, ['Protocol', 'Run']),
    get_raw_stats(df_50_Bhavani, ['Protocol', 'Run'])
]).reset_index()

stats_race_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_baseline, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_Lott, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_Age, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_NY, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_sofa, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_Colorado, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Bhavani, ['Protocol', 'Run', 'Race']), get_age_adjusted_stats(df_50_Bhavani, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
])

stats_age_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_baseline, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_Lott, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_Age, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_sofa, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_NY, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_Colorado, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Bhavani, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_stats(df_50_Bhavani, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index()
])

stats_COVID_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_baseline, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_Lott, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_Age, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_sofa, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_sofa, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_NY, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Bhavani, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_stats(df_50_Bhavani, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index()
])

'''
##########################
# YLL Sheets #
##########################
stats_YLL_overall_50 = pd.concat([
    get_raw_YLL(df_baseline, ['Protocol', 'Run']),
    get_raw_YLL(df_50_Lott, ['Protocol', 'Run']),
    get_raw_YLL(df_50_Age, ['Protocol', 'Run']),
    get_raw_YLL(df_50_sofa, ['Protocol', 'Run']),
    get_raw_YLL(df_50_NY, ['Protocol', 'Run']), 
    get_raw_YLL(df_50_Colorado, ['Protocol', 'Run']),
    get_raw_YLL(df_50_Bhavani, ['Protocol', 'Run'])
]).reset_index()

stats_YLL_race_50 = pd.concat([
    pd.concat([get_raw_YLL(df_baseline, ['Protocol', 'Run', 'Race']), get_age_adjusted_YLL(df_baseline, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Lott, ['Protocol', 'Run', 'Race']), get_age_adjusted_YLL(df_50_Lott, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Age, ['Protocol', 'Run', 'Race']), get_age_adjusted_YLL(df_50_Age, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_sofa, ['Protocol', 'Run', 'Race']), get_age_adjusted_YLL(df_50_sofa, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_NY, ['Protocol', 'Run', 'Race']), get_age_adjusted_YLL(df_50_NY, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Colorado, ['Protocol', 'Run', 'Race']), get_age_adjusted_YLL(df_50_Colorado, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Bhavani, ['Protocol', 'Run', 'Race']), get_age_adjusted_YLL(df_50_Bhavani, ['Protocol', 'Run', 'Race'])], axis=1).reset_index()
])

stats_YLL_age_50 = pd.concat([
    pd.concat([get_raw_YLL(df_baseline, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_YLL(df_baseline, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Lott, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_YLL(df_50_Lott, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Age, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_YLL(df_50_Age, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_sofa, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_YLL(df_50_sofa, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_NY, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_YLL(df_50_NY, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Colorado, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_YLL(df_50_Colorado, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Bhavani, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_YLL(df_50_Bhavani, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index()
])

stats_YLL_COVID_50 = pd.concat([
    pd.concat([get_raw_YLL(df_baseline, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_YLL(df_baseline, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Lott, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_YLL(df_50_Lott, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Age, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_YLL(df_50_Age, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_sofa, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_YLL(df_50_sofa, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_NY, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_YLL(df_50_NY, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_YLL(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_YLL(df_50_Bhavani, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_YLL(df_50_Bhavani, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index()
])
'''
##########################
# Cho YLL Sheets #
##########################
stats_Cho_overall_50 = pd.concat([
    get_Cho_YLS_stats(df_baseline, ['Protocol', 'Run']),  
    get_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run']), 
    get_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run']),
    get_Cho_YLS_stats(df_50_Bhavani, ['Protocol', 'Run'])
]).reset_index()

stats_Cho_race_50 = pd.concat([
    pd.concat([get_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Bhavani, ['Protocol', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(df_50_Bhavani, ['Protocol', 'Run', 'Race'])], axis=1).reset_index()
])

stats_Cho_age_50 = pd.concat([
    pd.concat([get_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Bhavani, ['Protocol', 'Run', 'Age_Group']), get_age_adjusted_Cho_YLS_stats(df_50_Bhavani, ['Protocol', 'Run', 'Age_Group'])], axis=1).reset_index()
])

stats_Cho_COVID_50 = pd.concat([
    pd.concat([get_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_baseline, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_Lott, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_Age, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_sofa, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_NY, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_Colorado, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(df_50_Bhavani, ['Protocol', 'Run', 'COVID_Status']), get_age_adjusted_Cho_YLS_stats(df_50_Bhavani, ['Protocol', 'Run', 'COVID_Status'])], axis=1).reset_index()
])

###Use below to convert Stats to excel sheets####

with pd.ExcelWriter("MC-50-results-stats.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    stats_overall_50.to_excel(writer, sheet_name="Overall", index=False)
    stats_race_50.to_excel(writer, sheet_name="Race", index=False)
    stats_age_50.to_excel(writer, sheet_name="Age Group", index=False)
    stats_COVID_50.to_excel(writer, sheet_name="COVID Status", index=False)
    #stats_YLL_overall_50.to_excel(writer, sheet_name="YLL_Overall", index=False)
    #stats_YLL_race_50.to_excel(writer, sheet_name="YLL_Race", index=False)
    #stats_YLL_age_50.to_excel(writer, sheet_name="YLL_Age_Group", index=False)
    #stats_YLL_COVID_50.to_excel(writer, sheet_name="YLL_COVID_Status", index=False)
    stats_Cho_overall_50.to_excel(writer, sheet_name="Cho_Overall", index=False)
    stats_Cho_race_50.to_excel(writer, sheet_name="Cho_Race", index=False)
    stats_Cho_age_50.to_excel(writer, sheet_name="Cho_Age_Group", index=False)
    stats_Cho_COVID_50.to_excel(writer, sheet_name="Cho_COVID_Status", index=False)

# AGGREGATION OF RUNS and Summary Statistics

In [2]:
#####
#Extract processed results from MC-50-results-stats
#####

stats_overall_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name='Overall')
stats_race_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name='Race')
stats_age_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name='Age Group')
stats_COVID_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name='COVID Status')
#stats_YLL_overall_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="YLL_Overall")
#stats_YLL_race_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="YLL_Race")
#stats_YLL_age_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="YLL_Age_Group")
#stats_YLL_COVID_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="YLL_COVID_Status")
stats_Cho_overall_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="Cho_Overall")
stats_Cho_race_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="Cho_Race")
stats_Cho_age_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="Cho_Age_Group")
stats_Cho_COVID_50 = pd.read_excel('MC-50-results-stats.xlsx', sheet_name="Cho_COVID_Status")

In [8]:
#Summary Stats Generator Capacity Levels (Change "Query")

print('-'*30, 'All Protocols - LS Rate')
stats_n_50 = (stats_overall_50
    .groupby(['Protocol'])['LS_rate'].agg(['mean', 'std', 'sem'])
    .assign(ci95_lo = lambda df_:
            df_['mean'] - 1.96* df_['sem'],
            ci95_hi = lambda df_:
            df_['mean'] + 1.96* df_['sem'],
    )
    .drop(['std','sem'], axis=1)
    .round(4)
    .mul(1000) # remove to do per patient, rather than per 1000 patients   
)
print(stats_n_50)
print('-'*30)

print('-'*30, 'All Protocols - YLS Rate')
stats_Cho_n_50 = (stats_Cho_overall_50
    .groupby(['Protocol'])['YLS_Cho_rate'].agg(['mean', 'std', 'sem'])
    .assign(ci95_lo = lambda df_:
            df_['mean'] - 1.96* df_['sem'],
            ci95_hi = lambda df_:
            df_['mean'] + 1.96* df_['sem'],
    )
    .drop(['std','sem'], axis=1)
    .round(4)
    .mul(1000).round(0) # remove to do per patient, rather than per 1000 patients   
)
print(stats_Cho_n_50)
print('-'*30)

------------------------------ All Protocols - LS Rate
           mean  ci95_lo  ci95_hi
Protocol                         
Age        28.7     28.6     28.9
Baseline    0.0      NaN      NaN
Bhavani    18.0     17.8     18.2
Colorado   14.7     14.5     14.9
Lottery     0.1     -0.1      0.3
NY SOFA    13.2     13.0     13.4
Pure SOFA  16.7     16.5     16.9
------------------------------
------------------------------ All Protocols - YLS Rate
             mean  ci95_lo  ci95_hi
Protocol                           
Age        3408.0   3402.0   3413.0
Baseline      0.0      NaN      NaN
Bhavani    1454.0   1447.0   1461.0
Colorado    999.0    992.0   1007.0
Lottery       3.0     -6.0     12.0
NY SOFA     416.0    407.0    424.0
Pure SOFA   839.0    831.0    846.0
------------------------------


In [10]:
print('-'*30, 'All Protocols - LS Rate')
stats_race_n_50 = (stats_race_50
    .groupby(['Protocol', 'Race'])['LS_rate'].agg(['mean', 'std', 'sem'])
    .assign(ci95_lo = lambda df_:
            df_['mean'] - 1.96* df_['sem'],
            ci95_hi = lambda df_:
            df_['mean'] + 1.96* df_['sem'],
    )
    .drop(['std','sem'], axis=1)
    .round(4)
    .mul(1000) # remove to do per patient, rather than per 1000 patients   
)
print(stats_race_n_50)
print('-'*30)

print('-'*30, 'All Protocols - YLS Rate')
stats_Cho_race_n_50 = (stats_Cho_race_50
    .groupby(['Protocol', 'Race'])['YLS_Cho_rate'].agg(['mean', 'std', 'sem'])
    .assign(ci95_lo = lambda df_:
            df_['mean'] - 1.96* df_['sem'],
            ci95_hi = lambda df_:
            df_['mean'] + 1.96* df_['sem'],
    )
    .drop(['std','sem'], axis=1)
    .round(4)
    .mul(1000).round(0) # remove to do per patient, rather than per 1000 patients   
)
print(stats_Cho_race_n_50)
print('-'*30)

------------------------------ All Protocols - LS Rate
                     mean  ci95_lo  ci95_hi
Protocol  Race                             
Age       >1 Race   207.8    203.0    212.5
          AAPI       39.5     36.5     42.6
          AIAN      342.3    329.5    355.2
          Black      99.9     99.0    100.7
          Hispanic  143.4    141.7    145.1
          Unknown    56.8     55.1     58.5
          White       4.3      4.0      4.6
Baseline  >1 Race     0.0      NaN      NaN
          AAPI        0.0      NaN      NaN
          AIAN        0.0      NaN      NaN
          Black       0.0      NaN      NaN
          Hispanic    0.0      NaN      NaN
          Unknown     0.0      NaN      NaN
          White       0.0      NaN      NaN
Bhavani   >1 Race    89.8     85.2     94.3
          AAPI       -1.3     -4.3      1.7
          AIAN     -336.2   -349.2   -323.2
          Black      53.3     52.5     54.2
          Hispanic   69.7     67.9     71.4
          Unknown    

## Table 2 - Overall Survival Rate, Allocation by Race, and Age-Adjusted Survival by Race

In [44]:
#Alternative way of deriving CIs over the parameters derived in the runs - i.e. traditional CIs across the rates in each run.

def get_CIs_rates(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                Survived=pd.NamedAgg(column="Survived", aggfunc="mean"),
                Allocated=pd.NamedAgg(column="Allocated", aggfunc="mean"),
                A_rate=pd.NamedAgg(column="A_rate", aggfunc="mean"),
                A_std=pd.NamedAgg(column="A_rate", aggfunc="std"),
                A_sem=pd.NamedAgg(column="A_rate", aggfunc="sem"),
                S_rate=pd.NamedAgg(column="S_rate", aggfunc="mean"),
                S_std=pd.NamedAgg(column="S_rate", aggfunc="std"),
                S_sem=pd.NamedAgg(column="S_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(A_rate_CI_lo = lambda df_: df_['A_rate'] - 1.96* df_['A_sem'],
                A_rate_CI_hi = lambda df_:df_['A_rate'] + 1.96* df_['A_sem'],
                S_rate_CI_lo = lambda df_: df_['S_rate'] - 1.96* df_['S_sem'],
                S_rate_CI_hi = lambda df_:df_['S_rate'] + 1.96* df_['S_sem'],
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N','Survived','Allocated','A_rate','A_rate_CI_lo','A_rate_CI_hi', 'S_rate', 'S_rate_CI_lo', 'S_rate_CI_hi'])
            .reset_index()
    )

#            .drop(df.loc[df_['Protocol']=='Baseline'].index, inplace=True)

def get_CIs_rates_with_AA(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                Survived=pd.NamedAgg(column="Survived", aggfunc="mean"),
                Allocated=pd.NamedAgg(column="Allocated", aggfunc="mean"),
                A_rate=pd.NamedAgg(column="A_rate", aggfunc="mean"),
                A_std=pd.NamedAgg(column="A_rate", aggfunc="std"),
                A_sem=pd.NamedAgg(column="A_rate", aggfunc="sem"),
                S_rate=pd.NamedAgg(column="S_rate", aggfunc="mean"),
                S_std=pd.NamedAgg(column="S_rate", aggfunc="std"),
                S_sem=pd.NamedAgg(column="S_rate", aggfunc="sem"),
                Age_Adj_S_rate=pd.NamedAgg(column="Age_Adj_S_rate", aggfunc="mean"),
                Age_Adj_S_std=pd.NamedAgg(column="Age_Adj_S_rate", aggfunc="std"),
                Age_Adj_S_sem=pd.NamedAgg(column="Age_Adj_S_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(A_rate_CI_lo = lambda df_: df_['A_rate'] - 1.96* df_['A_sem'],
                A_rate_CI_hi = lambda df_:df_['A_rate'] + 1.96* df_['A_sem'],
                S_rate_CI_lo = lambda df_: df_['S_rate'] - 1.96* df_['S_sem'],
                S_rate_CI_hi = lambda df_:df_['S_rate'] + 1.96* df_['S_sem'],
                Age_Adj_S_rate_CI_lo = lambda df_: df_['Age_Adj_S_rate'] - 1.96* df_['Age_Adj_S_sem'],
                Age_Adj_S_rate_CI_hi = lambda df_:df_['Age_Adj_S_rate'] + 1.96* df_['Age_Adj_S_sem'],
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N','Survived','Allocated','A_rate','A_rate_CI_lo','A_rate_CI_hi', 'S_rate', 'S_rate_CI_lo',
                               'S_rate_CI_hi', 'Age_Adj_S_rate','Age_Adj_S_rate_CI_lo','Age_Adj_S_rate_CI_hi'])
            .reset_index()
    )

def get_CIs_Cho(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                LE_Total_Cho=pd.NamedAgg(column="LE_Total_Cho", aggfunc="mean"),
                YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="mean"),
                YLL_Cho_std=pd.NamedAgg(column="YLL_Cho", aggfunc="std"),
                YLL_Cho_sem=pd.NamedAgg(column="YLL_Cho", aggfunc="sem"),
            )
            .reset_index()
            .assign(YLL_Cho_CI_lo = lambda df_: df_['YLL_Cho'] - 1.96* df_['YLL_Cho_sem'],
                YLL_Cho_CI_hi = lambda df_:df_['YLL_Cho'] + 1.96* df_['YLL_Cho_sem']
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N', 'LE_Total_Cho', 'YLL_Cho','YLL_Cho_CI_lo','YLL_Cho_CI_hi'])
            .reset_index()
    )

##Currently have remove Age-Adjustment since it is hard to interpret in this context.
def get_CIs_Cho_with_AA(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                LE_Total_Cho=pd.NamedAgg(column="LE_Total_Cho", aggfunc="mean"),
                YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="mean"),
                YLL_Cho_std=pd.NamedAgg(column="YLL_Cho", aggfunc="std"),
                YLL_Cho_sem=pd.NamedAgg(column="YLL_Cho", aggfunc="sem"),
                #Age_Adj_YLL_Cho_rate=pd.NamedAgg(column="Age_Adj_YLL_Cho_rate", aggfunc="mean"),
                #Age_Adj_YLL_Cho_rate_std=pd.NamedAgg(column="Age_Adj_YLL_Cho_rate", aggfunc="std"),
                #Age_Adj_YLL_Cho_rate_sem=pd.NamedAgg(column="Age_Adj_YLL_Cho_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(YLL_Cho_CI_lo = lambda df_: df_['YLL_Cho'] - 1.96* df_['YLL_Cho_sem'],
                YLL_Cho_CI_hi = lambda df_:df_['YLL_Cho'] + 1.96* df_['YLL_Cho_sem'],
                #Age_Adj_YLL_Cho_rate_CI_lo = lambda df_: df_['Age_Adj_YLL_Cho_rate'] - 1.96* df_['Age_Adj_YLL_Cho_rate_sem'],
                #Age_Adj_YLL_Cho_rate_CI_hi = lambda df_:df_['Age_Adj_YLL_Cho_rate'] + 1.96* df_['Age_Adj_YLL_Cho_rate_sem'],
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N', 'LE_Total_Cho', 'YLL_Cho', 'YLL_Cho_CI_lo', 'YLL_Cho_CI_hi', 'Age_Adj_YLL_Cho_rate','Age_Adj_YLL_Cho_rate_CI_lo','Age_Adj_YLL_Cho_rate_CI_hi'])
            .reset_index()
    )

#stats_overall_50_mean = pd.concat([stats_overall_50[(stats_overall_50['Protocol']=='Baseline')], get_CIs_rates(stats_overall_50[stats_overall_50.Protocol != 'Baseline'], ['Protocol'])])
#stats_race_50_mean = pd.concat([stats_race_50[(stats_race_50['Protocol']=='Baseline')], get_CIs_rates_with_AA(stats_race_50[stats_race_50.Protocol != 'Baseline'], ['Protocol', 'Race'])])

with pd.ExcelWriter("MC-50-Table_2_Allocation_Survival.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    # 
    pd.concat([stats_overall_50[(stats_overall_50['Protocol']=='Baseline')], 
               get_CIs_rates(stats_overall_50[stats_overall_50.Protocol != 'Baseline'], ['Protocol'])]).to_excel(writer, sheet_name="Overall", index=False)
    pd.concat([stats_race_50[(stats_race_50['Protocol']=='Baseline')], 
               get_CIs_rates_with_AA(stats_race_50[stats_race_50.Protocol != 'Baseline'], ['Protocol', 'Race'])]).to_excel(writer, sheet_name="Race", index=False)
    pd.concat([stats_age_50[(stats_age_50['Protocol']=='Baseline')], 
               get_CIs_rates_with_AA(stats_age_50[stats_age_50.Protocol != 'Baseline'], ['Protocol', 'Age_Group'])]).to_excel(writer, sheet_name="Age_Group", index=False)
    pd.concat([stats_COVID_50[(stats_COVID_50['Protocol']=='Baseline')], 
               get_CIs_rates_with_AA(stats_COVID_50[stats_COVID_50.Protocol != 'Baseline'], ['Protocol', 'COVID_Status'])]).to_excel(writer, sheet_name="COVID_Status", index=False)
    
    pd.concat([stats_Cho_overall_50[(stats_Cho_overall_50['Protocol']=='Baseline')], 
               get_CIs_Cho(stats_Cho_overall_50[stats_Cho_overall_50.Protocol != 'Baseline'], ['Protocol'])]).to_excel(writer, sheet_name="Cho_Overall", index=False)
    pd.concat([stats_Cho_race_50[(stats_Cho_race_50['Protocol']=='Baseline')], 
               get_CIs_Cho_with_AA(stats_Cho_race_50[stats_Cho_race_50.Protocol != 'Baseline'], ['Protocol', 'Race'])]).to_excel(writer, sheet_name="Cho_Race", index=False)
    pd.concat([stats_Cho_age_50[(stats_Cho_age_50['Protocol']=='Baseline')], 
               get_CIs_Cho_with_AA(stats_Cho_age_50[stats_Cho_age_50.Protocol != 'Baseline'], ['Protocol', 'Age_Group'])]).to_excel(writer, sheet_name="Cho_Age_Group", index=False)
    pd.concat([stats_Cho_COVID_50[(stats_Cho_COVID_50['Protocol']=='Baseline')], 
               get_CIs_Cho_with_AA(stats_Cho_COVID_50[stats_Cho_COVID_50.Protocol != 'Baseline'], ['Protocol', 'COVID_Status'])]).to_excel(writer, sheet_name="Cho_COVID_Status", index=False)

## Table 3 - Lives Saved and Years of Life Saved

In [5]:
#Alternative way of deriving CIs over the parameters derived in the runs - i.e. traditional CIs across the rates in each run.

def get_LS(df_, groups):
    return (df_
#            .assign(Lives_Saved = lambda df_1: df_1['Survived']-df_1['Exp_Surv'])
#            .assign(LS_rate = lambda df_2: df_2['Lives_Saved']/df_2['Pop_N'],)
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                Allocated=pd.NamedAgg(column="Allocated", aggfunc="mean"),
                Survived=pd.NamedAgg(column="Survived", aggfunc="mean"),
                Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="mean"),
                Lives_Saved=pd.NamedAgg(column="Lives_Saved", aggfunc="mean"),
                LS_std=pd.NamedAgg(column="Lives_Saved", aggfunc="std"),
                LS_sem=pd.NamedAgg(column="Lives_Saved", aggfunc="sem"),
                LS_rate=pd.NamedAgg(column="LS_rate", aggfunc="mean"),
                LS_rate_std=pd.NamedAgg(column="LS_rate", aggfunc="std"),
                LS_rate_sem=pd.NamedAgg(column="LS_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(
                LS_CI_lo = lambda df_3: df_3['Lives_Saved'] - 1.96* df_3['LS_sem'],
                LS_CI_hi = lambda df_3:df_3['Lives_Saved'] + 1.96* df_3['LS_sem'],
                LS_rate_CI_lo = lambda df_3: df_3['LS_rate'] - 1.96* df_3['LS_rate_sem'],
                LS_rate_CI_hi = lambda df_3:df_3['LS_rate'] + 1.96* df_3['LS_rate_sem'],
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N','Allocated','Survived', 'Exp_Surv', 'Lives_Saved', 'LS_CI_lo', 'LS_CI_hi', 'LS_rate', 'LS_rate_CI_lo', 'LS_rate_CI_hi'])
            .reset_index()
    )

def get_YLS_Cho(df_, groups):
    return (df_
            #.assign(YLS_Cho = lambda df_1: (df_1['LE_Total_Cho']-df_1['Cho_YLL'])-df_1['Exp_LE_Cho'])
            #.assign(YLS_Cho_rate = lambda df_2: df_2['YLS_Cho']/df_2['Pop_N'],)
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                LE_Total_Cho=pd.NamedAgg(column="LE_Total_Cho", aggfunc="mean"),
                Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="mean"),
                YLS_Cho=pd.NamedAgg(column="YLS_Cho", aggfunc="mean"),
                YLS_Cho_std=pd.NamedAgg(column="YLS_Cho", aggfunc="std"),
                YLS_Cho_sem=pd.NamedAgg(column="YLS_Cho", aggfunc="sem"),
                YLS_Cho_rate=pd.NamedAgg(column="YLS_Cho_rate", aggfunc="mean"),
                YLS_Cho_rate_std=pd.NamedAgg(column="YLS_Cho_rate", aggfunc="std"),
                YLS_Cho_rate_sem=pd.NamedAgg(column="YLS_Cho_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(YLS_Cho_CI_lo = lambda df_3: df_3['YLS_Cho'] - 1.96* df_3['YLS_Cho_sem'],
                YLS_Cho_CI_hi = lambda df_3:df_3['YLS_Cho'] + 1.96* df_3['YLS_Cho_sem'],
                YLS_Cho_rate_CI_lo = lambda df_3: df_3['YLS_Cho_rate'] - 1.96* df_3['YLS_Cho_rate_sem'],
                YLS_Cho_rate_CI_hi = lambda df_3:df_3['YLS_Cho_rate'] + 1.96* df_3['YLS_Cho_rate_sem']
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N', 'LE_Total_Cho', 'Exp_LE_Cho', 'YLS_Cho','YLS_Cho_CI_lo','YLS_Cho_CI_hi', 'YLS_Cho_rate','YLS_Cho_rate_CI_lo','YLS_Cho_rate_CI_hi'])
            .reset_index()
    )


'''
def get_LS_with_AA(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                Survived=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                Allocated=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                S_rate=pd.NamedAgg(column="S_rate", aggfunc="mean"),
                S_std=pd.NamedAgg(column="S_rate", aggfunc="std"),
                S_sem=pd.NamedAgg(column="S_rate", aggfunc="sem"),
                Age_Adj_S_rate=pd.NamedAgg(column="Age_Adj_S_rate", aggfunc="mean"),
                Age_Adj_S_std=pd.NamedAgg(column="Age_Adj_S_rate", aggfunc="std"),
                Age_Adj_S_sem=pd.NamedAgg(column="Age_Adj_S_rate", aggfunc="sem"),
            )
            .reset_index()
            .assign(S_rate_CI_lo = lambda df_: df_['S_rate'] - 1.96* df_['S_sem'],
                S_rate_CI_hi = lambda df_:df_['S_rate'] + 1.96* df_['S_sem'],
                Age_Adj_S_rate_CI_lo = lambda df_: df_['Age_Adj_S_rate'] - 1.96* df_['Age_Adj_S_sem'],
                Age_Adj_S_rate_CI_hi = lambda df_:df_['Age_Adj_S_rate'] + 1.96* df_['Age_Adj_S_sem'],
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N','Survived','Allocated','A_rate','A_rate_CI_lo','A_rate_CI_hi', 'S_rate', 'S_rate_CI_lo',
                               'S_rate_CI_hi', 'Age_Adj_S_rate','Age_Adj_S_rate_CI_lo','Age_Adj_S_rate_CI_hi'])
            .reset_index()
    )


def get_YLS_Cho_with_AA(df_, groups):
    return (df_
            .groupby(groups, as_index=True)
            .agg(
                Run=pd.NamedAgg(column="Run", aggfunc="count"),
                Pop_N=pd.NamedAgg(column="Pop_N", aggfunc="mean"),
                LE_Total_Cho=pd.NamedAgg(column="LE_Total_Cho", aggfunc="mean"),
                Cho_YLL=pd.NamedAgg(column="Cho_YLL", aggfunc="mean"),
                Cho_YLL_std=pd.NamedAgg(column="Cho_YLL", aggfunc="std"),
                Cho_YLL_sem=pd.NamedAgg(column="Cho_YLL", aggfunc="sem"),
                Age_Adj_Cho_YLL=pd.NamedAgg(column="Age_Adj_Cho_YLL", aggfunc="mean"),
                Age_Adj_Cho_YLL_std=pd.NamedAgg(column="Age_Adj_Cho_YLL", aggfunc="std"),
                Age_Adj_Cho_YLL_sem=pd.NamedAgg(column="Age_Adj_Cho_YLL", aggfunc="sem"),
            )
            .reset_index()
            .assign(Cho_YLL_CI_lo = lambda df_: df_['Cho_YLL'] - 1.96* df_['Cho_YLL_sem'],
                Cho_YLL_CI_hi = lambda df_:df_['Cho_YLL'] + 1.96* df_['Cho_YLL_sem'],
                Age_Adj_Cho_YLL_CI_lo = lambda df_: df_['Age_Adj_Cho_YLL'] - 1.96* df_['Age_Adj_Cho_YLL_sem'],
                Age_Adj_Cho_YLL_CI_hi = lambda df_:df_['Age_Adj_Cho_YLL'] + 1.96* df_['Age_Adj_Cho_YLL_sem'],
            )
            .round(4) #round all number to two decimal places
            .set_index(groups)
            .reindex(columns=['Run','Pop_N', 'LE_Total_Cho', 'Cho_YLL', 'Cho_YLL_CI_lo', 'Cho_YLL_CI_hi', 'Age_Adj_Cho_YLL','Age_Adj_Cho_YLL_CI_lo','Age_Adj_Cho_YLL_CI_hi'])
            .reset_index()
    )
'''
    

#stats_overall_50_mean = pd.concat([stats_overall_50[(stats_overall_50['Protocol']=='Baseline')], get_LS(stats_overall_50[stats_overall_50.Protocol != 'Baseline'], ['Protocol'])])
#stats_race_50_mean = pd.concat([stats_race_50[(stats_race_50['Protocol']=='Baseline')], get_LS_with_AA(stats_race_50[stats_race_50.Protocol != 'Baseline'], ['Protocol', 'Race'])])

with pd.ExcelWriter("MC-50-Table_3_LS_and_YLS.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    get_LS(stats_overall_50[stats_overall_50.Protocol != 'Baseline'], ['Protocol']).to_excel(writer, sheet_name="Overall", index=False)
    get_LS(stats_race_50[stats_race_50.Protocol != 'Baseline'], ['Protocol', 'Race']).to_excel(writer, sheet_name="Race", index=False)
    get_LS(stats_age_50[stats_age_50.Protocol != 'Baseline'], ['Protocol', 'Age_Group']).to_excel(writer, sheet_name="Age_Group", index=False)
    get_LS(stats_COVID_50[stats_COVID_50.Protocol != 'Baseline'], ['Protocol', 'COVID_Status']).to_excel(writer, sheet_name="COVID_Status", index=False)
        
    get_YLS_Cho(stats_Cho_overall_50[stats_Cho_overall_50.Protocol != 'Baseline'], ['Protocol']).to_excel(writer, sheet_name="Cho_Overall", index=False)
    get_YLS_Cho(stats_Cho_race_50[stats_Cho_race_50.Protocol != 'Baseline'], ['Protocol', 'Race']).to_excel(writer, sheet_name="Cho_Race", index=False)
    get_YLS_Cho(stats_Cho_age_50[stats_Cho_age_50.Protocol != 'Baseline'], ['Protocol', 'Age_Group']).to_excel(writer, sheet_name="Cho_Age_Group", index=False)
    get_YLS_Cho(stats_Cho_COVID_50[stats_Cho_COVID_50.Protocol != 'Baseline'], ['Protocol', 'COVID_Status']).to_excel(writer, sheet_name="Cho_COVID_Status", index=False)

T-Tests for significance of differences, White vs Hispanic, or White vs Black \
https://www.statology.org/pandas-t-test/ \

ttest_ind(white_group['survived'], other_group['survived']) 

In [49]:
from scipy.stats import ttest_ind

## OVERALL PERFORMANCE T_TEST - Lotter vs Other Protocol

## RACIAL DISPARITIES T_TEST - White vs Other Race (within Protocol)
print('-'*30)
print('Baseline - Black', ttest_ind(stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='White')]['S_rate'], stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='Black')]['S_rate'], equal_var=False))
print('Baseline - Hispanic', ttest_ind(stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='White')]['S_rate'], stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='Hispanic')]['S_rate'], equal_var=False))
print('-'*30)

## RACIAL PERFORMANCE T_TEST - Lottery vs Other Protocol (within Race)

'''
base_b = stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='Black')]
base_h = stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='Hispanic')]
base_w = stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='White')]
base_avg = stats_overall_50[(stats_overall_50['Protocol']=='Baseline')]


print('Baseline - Black', ttest_ind(base_w['S_rate'], base_b['S_rate'], equal_var=False))
print('Baseline - Hispanic', ttest_ind(base_w['S_rate'], base_h['S_rate'], equal_var=False))
print('-'*30)

NY_50_b = stats_race_50[(stats_race_50['Protocol']=='NY SOFA') & (stats_race_50['Race']=='Black')]
NY_50_h = stats_race_50[(stats_race_50['Protocol']=='NY SOFA') & (stats_race_50['Race']=='Hispanic')]
NY_50_w = stats_race_50[(stats_race_50['Protocol']=='NY SOFA') & (stats_race_50['Race']=='White')]
NY_50_avg = stats_overall_50[(stats_overall_50['Protocol']=='NY SOFA')]

print('NY - Black', ttest_ind(NY_50_w['S_rate'], NY_50_b['S_rate'], equal_var=False))
print('NY - Hispanic', ttest_ind(NY_50_w['S_rate'], NY_50_h['S_rate'], equal_var=False))
print('-'*30)

Age_50_b = stats_race_50[(stats_race_50['Protocol']=='Age') & (stats_race_50['Race']=='Black')]
Age_50_h = stats_race_50[(stats_race_50['Protocol']=='Age') & (stats_race_50['Race']=='Hispanic')]
Age_50_w = stats_race_50[(stats_race_50['Protocol']=='Age') & (stats_race_50['Race']=='White')]
Age_50_avg = stats_overall_50[(stats_overall_50['Protocol']=='Age')]

print('Age - Black', ttest_ind(Age_50_w['S_rate'], Age_50_b['S_rate'], equal_var=False))
print('Age - Hispanic', ttest_ind(Age_50_w['S_rate'], Age_50_h['S_rate'], equal_var=False))
print('-'*30)

Lott_50_b = stats_race_50[(stats_race_50['Protocol']=='Lottery') & (stats_race_50['Race']=='Black')]
Lott_50_h = stats_race_50[(stats_race_50['Protocol']=='Lottery') & (stats_race_50['Race']=='Hispanic')]
Lott_50_w = stats_race_50[(stats_race_50['Protocol']=='Lottery') & (stats_race_50['Race']=='White')]
Lott_50_avg = stats_overall_50[(stats_overall_50['Protocol']=='Lottery')]

print('Lottery - Black', ttest_ind(Lott_50_w['S_rate'], Lott_50_b['S_rate'], equal_var=False))
print('Lottery - Hispanic', ttest_ind(Lott_50_w['S_rate'], Lott_50_h['S_rate'], equal_var=False))
print('-'*30)

'''
'''
#COVID T-Tests
base_c0 = stats_overall_50[(stats_overall_50['Protocol']=='Baseline') & (stats_overall_50['COVID_Status']==0)]
base_c1 = stats_overall_50[(stats_overall_50['Protocol']=='Baseline') & (stats_overall_50['COVID_Status']==1)]
NY_50_c0 = stats_overall_50[(stats_overall_50['Protocol']=='NY SOFA') & (stats_overall_50['COVID_Status']==0)]
NY_50_c1 = stats_overall_50[(stats_overall_50['Protocol']=='NY SOFA') & (stats_overall_50['COVID_Status']==1)]
Age_50_c0 = stats_overall_50[(stats_overall_50['Protocol']=='Age') & (stats_overall_50['COVID_Status']==0)]
Age_50_c1 = stats_overall_50[(stats_overall_50['Protocol']=='Age') & (stats_overall_50['COVID_Status']==1)]
Lott_50_c0 = stats_overall_50[(stats_overall_50['Protocol']=='Lottery') & (stats_overall_50['COVID_Status']==0)]
Lott_50_c1 = stats_overall_50[(stats_overall_50['Protocol']=='Lottery') & (stats_overall_50['COVID_Status']==1)]

print('Baseline - Neg', ttest_ind(base_avg['S_rate'], base_c0['S_rate'], equal_var=False))
print('Baseline - Pos', ttest_ind(base_avg['S_rate'], base_c1['S_rate'], equal_var=False))
print('NY SOFA - Neg', ttest_ind(NY_50_avg['S_rate'], NY_50_c0['S_rate'], equal_var=False))
print('NY SOFA - Pos', ttest_ind(NY_50_avg['S_rate'], NY_50_c1['S_rate'], equal_var=False))
print('Age - Neg', ttest_ind(Age_50_avg['S_rate'], Age_50_c0['S_rate'], equal_var=False))
print('Age - Pos', ttest_ind(Age_50_avg['S_rate'], Age_50_c1['S_rate'], equal_var=False))
print('Lottery - Neg', ttest_ind(Lott_50_avg['S_rate'], Lott_50_c0['S_rate'], equal_var=False))
print('Lottery - Pos', ttest_ind(Lott_50_avg['S_rate'], Lott_50_c1['S_rate'], equal_var=False))
'''


------------------------------
Baseline - Black Ttest_indResult(statistic=nan, pvalue=nan)
Baseline - Hispanic Ttest_indResult(statistic=nan, pvalue=nan)
------------------------------


  print('Baseline - Black', ttest_ind(stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='White')]['S_rate'], stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='Black')]['S_rate'], equal_var=False))
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  print('Baseline - Hispanic', ttest_ind(stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='White')]['S_rate'], stats_race_50[(stats_race_50['Protocol']=='Baseline') & (stats_race_50['Race']=='Hispanic')]['S_rate'], equal_var=False))


"\n#COVID T-Tests\nbase_c0 = stats_overall_50[(stats_overall_50['Protocol']=='Baseline') & (stats_overall_50['COVID_Status']==0)]\nbase_c1 = stats_overall_50[(stats_overall_50['Protocol']=='Baseline') & (stats_overall_50['COVID_Status']==1)]\nNY_50_c0 = stats_overall_50[(stats_overall_50['Protocol']=='NY SOFA') & (stats_overall_50['COVID_Status']==0)]\nNY_50_c1 = stats_overall_50[(stats_overall_50['Protocol']=='NY SOFA') & (stats_overall_50['COVID_Status']==1)]\nAge_50_c0 = stats_overall_50[(stats_overall_50['Protocol']=='Age') & (stats_overall_50['COVID_Status']==0)]\nAge_50_c1 = stats_overall_50[(stats_overall_50['Protocol']=='Age') & (stats_overall_50['COVID_Status']==1)]\nLott_50_c0 = stats_overall_50[(stats_overall_50['Protocol']=='Lottery') & (stats_overall_50['COVID_Status']==0)]\nLott_50_c1 = stats_overall_50[(stats_overall_50['Protocol']=='Lottery') & (stats_overall_50['COVID_Status']==1)]\n\nprint('Baseline - Neg', ttest_ind(base_avg['S_rate'], base_c0['S_rate'], equal_var=Fa

# OFFCUTS and OLD CODE

#### Raw Years of Life Lost Calculators
Calculates years of life lost by taking the sum of the (i) LE and (ii) co-morbidity adjusted LE, for all those who are deceased (i.e. survived=0)
*TO DO* = (1) Aggregate LE only on Survived=0, but Pop_N should count all within Age-Group

In [None]:
#### 
# WARNING THIS CALCULATOR HAS NOT BEEN HARMONIZED TO THE STANDARD CALC/ STRCUTURE USED FOR Lives Saved and Years of Life Saved (CHO)
####


from scipy.stats.distributions import chi2

#DEFINE Raw LE Calculator#
def get_raw_YLL(df_, groups, alpha=0.05):
    return (df_
        .fillna(0)
        .assign(YLL = lambda df_1: df_1['LE'].where(df_1['Survived'] == 0, 0), ## if deceased, then retain life expectancy, otherwise 0 out (so that we sum to get YLLs).
            Exp_LE = lambda df_: df_['Baseline_Surv']*df_['LE']*df_['Capacity']# Capacity x LE if patient would have survived with ventilator, otherwise 0 (if deceased even with ventilator)
            ) 
        .groupby(groups, as_index=False)
        .agg(Pop_N=pd.NamedAgg(column="LE", aggfunc="count"),
             LE_Total=pd.NamedAgg(column="LE", aggfunc="sum"),
             Exp_LE=pd.NamedAgg(column="Exp_LE", aggfunc="sum"),
             YLL=pd.NamedAgg(column="YLL", aggfunc="sum"))
        .assign(YLL_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['YLL'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )),
                YLL_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['YLL']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    ))
            )
        .round(2) #round all number to two decimal places
        .set_index(groups)
    )

#DEFINE Age-Adjusted Raw LE Calculator#
def get_age_adjusted_YLL(df_, groups, alpha=0.05):
    
    std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]})

    if ('Age_Group' in groups): 
        groups_age = groups
    else:
        groups_age = groups + ['Age_Group']

    return (df_
        .fillna(0)
        .assign(YLL = lambda df_0: df_0['LE'].where(df_0['Survived'] == 0, 0), ## if deceased, then retain life expectancy, otherwise 0 out (so that we sum to get YLLs).
            Baseline_LE = lambda df_: df_['Baseline_Surv']*df_['LE'],# LE if patient would have survived with ventilator, otherwise 0 (if deceased even with ventilator)
            Baseline_Cho_LE = lambda df_: df_['Baseline_Surv']*df_['Cho_LE'] # Cho_LE if patient would have survived with ventilator, otherwise 0 (if deceased even with ventilator)
            )
        .groupby(groups_age, as_index=True)
        .agg(Pop_N=pd.NamedAgg(column="LE", aggfunc="count"),
             LE_Total=pd.NamedAgg(column="LE", aggfunc="sum"),
             YLL=pd.NamedAgg(column="YLL", aggfunc="sum"))
        .reset_index()
        .merge(std_pop, on='Age_Group') #bring in standard pop for age-adjustment
        #Calculate Age-Adjusted YLL
        .assign(Std_Pop = lambda df_0: df_0['Std_Pop'].mask(df_0['Pop_N'] == 0, 0), #zero out Std_Pop for each sub-group age-band where sub-group has no subjects (i.e. no AIAN in <25)
            )
        .assign(Age_Adj_YLL_var = lambda df_1: (df_1['Std_Pop']**2)*(df_1['YLL']/(df_1['Pop_N']**2)), #variance for each age_group to be summed for total variance of Race (see WA Health doc)
                w_i = lambda df_1: df_1['Std_Pop']/df_1['Pop_N'], #calc w variabLE for each Age_Group and Race (use max for Race Fay and Freur, and avg for Race Tiwari mod)
                Age_Adj_YLL = lambda df_3: (df_3['YLL']/df_3['Pop_N']) * df_3['Std_Pop'] * 100 #calculate the age-adjusted number of life years (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
            )
        .assign(w_max = lambda df_2: df_2.groupby(groups)['w_i'].transform('max')) #find max pop weight for Fay and Freur CIs (note use of transform, see here https://stackoverflow.com/questions/35640364/python-pandas-max-value-in-a-group-as-a-new-column
        .groupby(groups, as_index=False).sum(numeric_only=True)
        .assign(w_max = lambda df_3: df_3['w_max']/len(df_.groupby('Age_Group').count())) #was 8 #divide sum of max pop weights by number of age-groups (to re-idnetify the max pop weight for Race)
        .assign(Age_Adj_YLL_CI_lo = lambda df_3: 
                    (df_3['Age_Adj_YLL_var'])/(2*df_3['Age_Adj_YLL']) *
                    chi2.ppf(alpha/2, #alpha 
                        (2*df_3['Age_Adj_YLL']**2)/df_3['Age_Adj_YLL_var']), #shape
                Age_Adj_YLL_CI_hi = lambda df_3: 
                    ((df_3['Age_Adj_YLL_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_YLL']+df_3['w_max']))) *
                    chi2.ppf(1-alpha/2, # alpha
                        (2*(df_3['Age_Adj_YLL']+df_3['w_max'])**2)/(df_3['Age_Adj_YLL_var']+df_3['w_max']**2)) #shape
            )
        .drop(['LE_Total', 'Pop_N', 'YLL', 'Age_Adj_YLL_var', 'Std_Pop', 'w_i', 'w_max'], axis=1)
        .round(2) #round all numbers to 2 decimals places
        .set_index(groups)
    )

In [None]:
#####
#NOTE: the following code generates CIs that are *means* of the Fay-Feur individual CIs for each RUN
#####

#Overall Stats
print(stats_overall_50.groupby(['Protocol']).mean())
print(stats_race_50.groupby(['Protocol', 'Race']).mean())

with pd.ExcelWriter("MC-50-Table_2_old.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    stats_overall_50.groupby(['Protocol']).mean().to_excel(writer, sheet_name="Overall")
    stats_race_50.groupby(['Protocol', 'Race']).mean().to_excel(writer, sheet_name="Race")
    stats_age_50.groupby(['Protocol', 'Age_Group']).mean().to_excel(writer, sheet_name="Age Group")
    stats_COVID_50.groupby(['Protocol', 'COVID_Status']).mean().to_excel(writer, sheet_name="COVID Status")
    stats_YLL_overall_50.groupby(['Protocol']).mean().to_excel(writer, sheet_name="YLL_Overall")
    stats_YLL_race_50.groupby(['Protocol', 'Race']).mean().to_excel(writer, sheet_name="YLL_Race")
    stats_YLL_age_50.groupby(['Protocol', 'Age_Group']).mean().to_excel(writer, sheet_name="YLL_Age_Group")
    stats_YLL_COVID_50.groupby(['Protocol', 'COVID_Status']).mean().to_excel(writer, sheet_name="YLL_COVID_Status")
    stats_Cho_overall_50.groupby(['Protocol']).mean().to_excel(writer, sheet_name="Cho_Overall")
    stats_Cho_race_50.groupby(['Protocol', 'Race']).mean().to_excel(writer, sheet_name="Cho_Race")
    stats_Cho_age_50.groupby(['Protocol', 'Age_Group']).mean().to_excel(writer, sheet_name="Cho_Age_Group")
    stats_Cho_COVID_50.groupby(['Protocol', 'COVID_Status']).mean().to_excel(writer, sheet_name="Cho_COVID_Status")
'''
print('-'*30, 'All Protocols')
stats_All = (stats_overall_50
    .groupby(['Run','Protocol'])['Survived'].agg(['mean', 'std', 'sem'])
    .assign(ci95_hi = lambda df_:
            df_['mean'] + 1.96* df_['sem'],
            ci95_lo = lambda df_:
            df_['mean'] - 1.96* df_['sem']
    )
)
print(stats_All)
print('-'*30)
'''

In [8]:
##COMBINED
stats_overall_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Run','Protocol']), get_age_adjusted_stats(df_baseline, ['Run','Protocol']),
               get_raw_LE(df_baseline, ['Run','Protocol']), get_age_adjusted_LE(df_baseline, ['Run','Protocol'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Run','Protocol']), get_age_adjusted_stats(df_50_NY, ['Run','Protocol'])],
              get_raw_LE(df_50_NY, ['Run','Protocol']), get_age_adjusted_LE(df_50_NY, ['Run','Protocol']), axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Run','Protocol']), get_age_adjusted_stats(df_50_Age, ['Run','Protocol'])],
              get_raw_LE(df_50_Age, ['Run','Protocol']), get_age_adjusted_LE(df_50_Age, ['Run','Protocol']), axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Run','Protocol']), get_age_adjusted_stats(df_50_Lott, ['Run','Protocol'])],
              get_raw_LE(df_50_Lott, ['Run','Protocol']), get_age_adjusted_LE(df_50_Lott, ['Run','Protocol']), axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Bhavani, ['Run','Protocol']), get_age_adjusted_stats(df_50_Bhavani, ['Run','Protocol'])],
              get_raw_LE(df_50_Bhavani, ['Run','Protocol']), get_age_adjusted_LE(df_50_Bhavani, ['Run','Protocol']), axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Run','Protocol']), get_age_adjusted_stats(df_50_Colorado, ['Run','Protocol'])],
              get_raw_LE(df_50_Colorado, ['Run','Protocol']), get_age_adjusted_LE(df_50_Colorado, ['Run','Protocol']), axis=1).reset_index()
])
'''
stats_race_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Run','Protocol', 'Race']), get_age_adjusted_stats(df_baseline, ['Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Run','Protocol', 'Race']), get_age_adjusted_stats(df_50_NY, ['Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Run','Protocol', 'Race']), get_age_adjusted_stats(df_50_Age, ['Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Run','Protocol', 'Race']), get_age_adjusted_stats(df_50_Lott, ['Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Bhavani, ['Run','Protocol', 'Race']), get_age_adjusted_stats(df_50_Bhavani, ['Run','Protocol', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Run','Protocol', 'Race']), get_age_adjusted_stats(df_50_Colorado, ['Run','Protocol', 'Race'])], axis=1).reset_index()
])

stats_age_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_baseline, ['Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_50_NY, ['Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_50_Age, ['Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_50_Lott, ['Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Bhavani, ['Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_50_Bhavani, ['Run','Protocol', 'Age_Group'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Run','Protocol', 'Age_Group']), get_age_adjusted_stats(df_50_Colorado, ['Run','Protocol', 'Age_Group'])], axis=1).reset_index()
])

stats_COVID_50 = pd.concat([
    pd.concat([get_raw_stats(df_baseline, ['Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_baseline, ['Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_NY, ['Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_50_NY, ['Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Age, ['Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_50_Age, ['Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Lott, ['Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_50_Lott, ['Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Bhavani, ['Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_50_Bhavani, ['Run','Protocol', 'COVID_Status'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(df_50_Colorado, ['Run','Protocol', 'COVID_Status']), get_age_adjusted_stats(df_50_Colorado, ['Run','Protocol', 'COVID_Status'])], axis=1).reset_index()
])
'''

###Use below to convert Stats to excel sheets####

'''
print('-'*30, 'All Protocols')
stats_All = (stats_overall_50
    .groupby(['Run','Protocol'])['Survived'].agg(['mean', 'std', 'sem'])
    .assign(ci95_hi = lambda df_:
            df_['mean'] + 1.96* df_['sem'],
            ci95_lo = lambda df_:
            df_['mean'] - 1.96* df_['sem']
    )
)
print(stats_All)
print('-'*30)
'''
with pd.ExcelWriter("MC-50-results-stats.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    stats_overall_50.to_excel(writer, sheet_name="Overall")
#    stats_race_50.to_excel(writer, sheet_name="Race")
#    stats_age_50.to_excel(writer, sheet_name="Age Group")
#    stats_COVID_50.to_excel(writer, sheet_name="COVID Status")

  .groupby(groups, as_index=False).sum()
  .groupby(groups, as_index=False).sum()
  .groupby(groups, as_index=False).sum()
  .groupby(groups, as_index=False).sum()
  pd.concat([get_raw_stats(df_50_NY, ['Run','Protocol']), get_age_adjusted_stats(df_50_NY, ['Run','Protocol'])],


TypeError: concat() got multiple values for argument 'axis'