# Preliminaries and Dataframe Construction

In [1]:
# Import modules
import sqlite3

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats

#Formatting
plt.rcParams['font.family'] = 'Times New Roman'  # Set plt shows font to Times New Roman
plt.rcParams['axes.grid'] = True  # Ensure line graphs display on graphs
sns.set_palette(sns.color_palette('Accent')) #set color palette to a nice seaborn style https://seaborn.pydata.org/tutorial/color_palettes.html

In [2]:
#Import Encounters from Database Query
df_baseline = pd.read_pickle("encounters.pkl").assign(Run = 1, Capacity = 1, Allocated = 1, Baseline_Surv = lambda df_: df_['Survived']).astype({'Age_Group': 'str'})
print(df_baseline.info())

df_demographics = (df_baseline
    .assign(Baseline_Surv = lambda df_baseline: df_baseline['Survived']) #create column for whether individual survived at 100% capacity / with ventilator support
    .astype({'Age_Group': 'str'})
    .reindex(columns = [
    'EncounterID',
    'Race',
    'Sex',
    'Age_Group',
    'COVID_Status',
    'Baseline_Surv',
    'LE',
    'Cho_LE']
    )
)

MC_ECI_Maryland = pd.read_csv('MC_ECI_Maryland.csv', converters={'EncounterID':str, 'Age_Group':str}).assign(Protocol = "Maryland", Capacity = 0.5).merge(df_demographics, on=['EncounterID'])
MC_ECI_Maryland_Age = pd.read_csv('MC_ECI_Maryland_Age.csv', converters={'EncounterID':str, 'Age_Group':str}).assign(Protocol = "Maryland_Age", Capacity = 0.5).merge(df_demographics, on=['EncounterID'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3707 entries, 0 to 3706
Data columns (total 30 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   EncounterID       3707 non-null   object 
 1   SubjectID         3707 non-null   object 
 2   Age               3707 non-null   float64
 3   Race              3707 non-null   object 
 4   Ethnicity         3707 non-null   object 
 5   Sex               3707 non-null   object 
 6   InitialSOFA       3707 non-null   int64  
 7   StayLength        3707 non-null   float64
 8   CCS_raw           3707 non-null   int64  
 9   CCS_age           3707 non-null   int64  
 10  CCS_Colorado      3707 non-null   int64  
 11  ECI_raw           3707 non-null   int64  
 12  LE                3707 non-null   float64
 13  Cho_LE            3707 non-null   float64
 14  COVID_Status      3707 non-null   int8   
 15  Elective_Flag     3707 non-null   int64  
 16  Discharge_Status  3707 non-null   int64  


In [3]:
from scipy.stats.distributions import chi2

#DEFINE Raw Stats Calculator#
def get_raw_stats(df_, groups, alpha=0.05):
    return (df_
        .fillna(0)
        .assign(Exp_Surv = lambda df_: df_['Baseline_Surv']*df_['Capacity'], #multiples each individual patient (i.e. 1 or 0) by capacity (e.g. 0.5) to get expected survival in agg.
                FN = lambda df_: df_['Baseline_Surv'].mask(df_['Allocated'] == 1, 0), #return baseline, then overwrite with 0 if allocated=1
                FP = lambda df_: df_['Allocated'].mask(df_['Baseline_Surv'] == 1, 0) #return allocated, then overwrite with 0 if baseline=1
            )          
        .groupby(groups, as_index=True)
        .agg(Pop_N=pd.NamedAgg(column="Survived", aggfunc="count"),
             Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="sum"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             Survived=pd.NamedAgg(column="Survived", aggfunc="sum"),
             FN=pd.NamedAgg(column="FN", aggfunc="sum"),
             FP=pd.NamedAgg(column="FP", aggfunc="sum")
             )
        .reset_index()
        #Calculate Lives Saved and Allocation Rate
        .assign(Lives_Saved = lambda df_0: df_0['Survived']-df_0['Exp_Surv'],
                A_rate = lambda df_0: df_0['Allocated'] / df_0['Pop_N']
            )
        .assign(A_rate_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['Allocated'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N'],
                A_rate_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['Allocated']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N']
            )
        #Calculate Survival Rate (by first calculating death rate)
        .assign(Deaths = lambda df_0: df_0['Pop_N'] - df_0['Survived'])
        .assign(D_rate = lambda df_1: df_1['Deaths'] / df_1['Pop_N'])
        .assign(D_rate_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['Deaths'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N'],
                D_rate_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['Deaths']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )) / df_2['Pop_N']
            )
        .assign(S_rate = lambda df_3: 1-df_3['D_rate'],
                S_rate_CI_lo = lambda df_3: 1-df_3['D_rate_CI_hi'],
                S_rate_CI_hi = lambda df_3: 1-df_3['D_rate_CI_lo'])
        #Calculate FNR, FPR and Lives Saved per Patient
        .assign(FN_rate = lambda df_0: df_0['FN']/(df_0['Pop_N']-df_0['Allocated']),
                FP_rate = lambda df_0: df_0['FP']/df_0['Allocated'],
                LS_rate = lambda df_0: df_0['Lives_Saved']/df_0['Pop_N']
            )
        #Cleanup
        .drop(['Deaths', 'D_rate', 'D_rate_CI_hi', 'D_rate_CI_lo'], axis=1)
        .round(4)
        .set_index(groups)
    )

#DEFINE Age-Adjusted Calculator#
def get_age_adjusted_stats(df_, groups, alpha=0.05):
    
    std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]})#.astype({'Age_Group': 'category'})

    if ('Age_Group' in groups): 
        groups_age = groups
    else:
        groups_age = groups + ['Age_Group']

    return (df_
        .fillna(0)
        .assign(Exp_Surv = lambda df_: df_['Baseline_Surv']*df_['Capacity'], #multiples each individual patient (i.e. 1 or 0) by capacity (e.g. 0.5) to get expected survival in agg.
                FN = lambda df_: df_['Baseline_Surv'].mask(df_['Allocated'] == 1, 0), #return baseline, then overwrite with 0 if allocated=1
                FP = lambda df_: df_['Allocated'].mask(df_['Baseline_Surv'] == 1, 0) #return allocated, then overwrite with 0 if baseline=1
            )
        #calculate population totals for each age group (additionally sliced by other variables, e.g. protocol, run, race)
        .groupby(groups_age, as_index=True) #originally false
        .agg(Pop_N=pd.NamedAgg(column="Survived", aggfunc="count"),
             Exp_Surv=pd.NamedAgg(column="Exp_Surv", aggfunc="sum"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             Survived=pd.NamedAgg(column="Survived", aggfunc="sum"),
             FN=pd.NamedAgg(column="FN", aggfunc="sum"),
             FP=pd.NamedAgg(column="FP", aggfunc="sum")             
             )
        .reset_index()
        .merge(std_pop, on='Age_Group') #bring in standard pop for age-adjustment
        .assign(Std_Pop = lambda df_0: df_0['Std_Pop'].mask(df_0['Pop_N'] == 0, 0), #zero out Std_Pop for each sub-group age-band where sub-group has no subjects (i.e. no AIAN in <25)
                Deaths = lambda df_0: df_0['Pop_N'] - df_0['Survived'],
                Lives_Saved = lambda df_0: df_0['Survived']-df_0['Exp_Surv']
            )
        #Calculate Age-Adj Deaths and Variance for each age-group
        .assign(Age_Adj_D_rate = lambda df_1: (df_1['Deaths']/df_1['Pop_N']) * df_1['Std_Pop'], #calculate the age-adjusted rate (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                Age_Adj_D_var = lambda df_1: df_1['Deaths']*((df_1['Std_Pop']/df_1['Pop_N'])**2), ## SEER STAT version https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm
                #variance for each age_group to be summed for total variance of Race (see WA Health doc) https://doh.wa.gov/sites/default/files/legacy/Documents/1500//ConfIntGuide.pdf
         ## Calculate Age-Adjusted FNR and FPR for each age-group
                Age_Adj_FN_rate = lambda df_1: (df_1['FN']/(df_1['Pop_N']-df_1['Allocated'])) * df_1['Std_Pop'], #first converts LS into an LS-rate, then multiples by proportion of std pop in that group
                Age_Adj_FP_rate = lambda df_1: (df_1['FP']/df_1['Allocated']) * df_1['Std_Pop'],
        ## Calculate Age-Adjusted Lives Saved Rate for each age group
                Age_Adj_LS_rate = lambda df_1: (df_1['Lives_Saved']/df_1['Pop_N']) * df_1['Std_Pop'], #first converts LS into an LS-rate, then multiples by proportion of std pop in that group
            )
        #Assign the w variables to each Age_Group (and other groupings)
        .assign(w_i = lambda df_1: df_1['Std_Pop']/df_1['Pop_N']) #calc pop weight for each Age_Group and Race (use max for Fay and Freur, and avg for Tiwari mod)
        .assign(w_max = lambda df_2: df_2.groupby(groups)['w_i'].transform('max')) #find max pop weight for Fay and Freur CIs (note use of transform, see here https://stackoverflow.com/questions/35640364/python-pandas-max-value-in-a-group-as-a-new-column
        #Collapse the age-groups to calculate total age-adjusted deaths/lives saved.
        .groupby(groups, as_index=False).sum(numeric_only=True)
        .assign(w_max = lambda df_3: df_3['w_max']/len(df_.groupby('Age_Group').count())) #divide sum of max pop weights by number of age-groups - i.e. 8 (to re-idnetify the max pop weight for Race)
        ## Calculate Fay-Feur CIs for Age-Adjusted Death Rates
        .assign(Age_Adj_D_rate_CI_lo = lambda df_3: 
                    (df_3['Age_Adj_D_var'])/(2*df_3['Age_Adj_D_rate']) *
                    chi2.ppf(alpha/2, #alpha 
                        (2*df_3['Age_Adj_D_rate']**2)/df_3['Age_Adj_D_var']), #shape
                Age_Adj_D_rate_CI_hi = lambda df_3: 
                    ((df_3['Age_Adj_D_var']+df_3['w_max']**2)/(2*(df_3['Age_Adj_D_rate']+df_3['w_max']))) *
                    chi2.ppf(1-alpha/2, # alpha
                        (2*(df_3['Age_Adj_D_rate']+df_3['w_max'])**2)/(df_3['Age_Adj_D_var']+df_3['w_max']**2)) #shape
            )
        #Calculate Age-Adjusted Survival Rates and CIs (as inverse of AA Death Rates and CIs)
        .assign(Age_Adj_S_rate = lambda df_4: 1-df_4['Age_Adj_D_rate'],
                Age_Adj_S_rate_CI_lo = lambda df_4: 1-df_4['Age_Adj_D_rate_CI_hi'],
                Age_Adj_S_rate_CI_hi = lambda df_4: 1-df_4['Age_Adj_D_rate_CI_lo'],
            )
        .drop(['Survived', 'Allocated', 'FN', 'FP', 'Exp_Surv', 'Lives_Saved', 'Pop_N', 'Deaths','Std_Pop','w_i', 'w_max', 'Age_Adj_D_var', 'Age_Adj_D_rate', 'Age_Adj_D_rate_CI_hi', 'Age_Adj_D_rate_CI_lo'], axis=1)
        .round(4)
        .set_index(groups)
    )


In [4]:
std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]}).astype({'Age_Group': 'category'})

std_pop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Age_Group  8 non-null      category
 1   Std_Pop    8 non-null      float64 
dtypes: category(1), float64(1)
memory usage: 568.0 bytes


In [5]:
from scipy.stats.distributions import chi2

#DEFINE Cormorbidity Adjusted YLL Calculator#
def get_Cho_YLS_stats(df_, groups, alpha=0.05):
    return (df_
        .fillna(0)
        .assign(YLL_Cho = lambda df_: df_['Cho_LE'].mask(df_['Survived'] == 1, 0), ## if survived then zero, otherwise retain life expectancy so that we sum to get YLLs.
                Exp_LE_Cho = lambda df_: df_['Baseline_Surv']*df_['Cho_LE']*df_['Capacity'], # if patient would have survived with ventilator then their Cho_LE x Capacity (e.g. 0.5), otherwise 0 (if deceased even with ventilator)
                FN_LE_Cho = lambda df_: (df_['Baseline_Surv']*df_['Cho_LE']).mask(df_['Allocated'] == 1, 0), #return baseline*Cho_LE, then overwrite with 0 if allocated=1
                FP_LE_Cho = lambda df_: (df_['Allocated']*df_['Cho_LE']).mask(df_['Baseline_Surv'] == 1, 0) #return allocated*Cho_LE, then overwrite with 0 if baseline=1
            )
        .groupby(groups, as_index=False)
        .agg(Pop_N=pd.NamedAgg(column="Cho_LE", aggfunc="count"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             LE_Total_Cho=pd.NamedAgg(column="Cho_LE", aggfunc="sum"), #Total life expectancy for everyone, regardless of survival in 100% baseline
             Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who did survive in 100% baseline, multiplied by capacity (i.e. 0.5)
             YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="sum"), #Total life expectancy (lost) of all those who did not survive (in simulation)
             FN_LE_Cho=pd.NamedAgg(column="FN_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who would have survived, but was not allocated (false negatives)
             FP_LE_Cho=pd.NamedAgg(column="FP_LE_Cho", aggfunc="sum") #Total life expectancy for everyone who did NOT survive, but was allocated (false positives)
             )
        .assign(YLS_Cho = lambda df_1: (df_1['LE_Total_Cho']-df_1['YLL_Cho'])-df_1['Exp_LE_Cho'])
        .assign(YLS_Cho_rate = lambda df_2: df_2['YLS_Cho']/df_2['Pop_N'])
        .assign(YLS_Cho_CI_lo = lambda df_2: (0.5*chi2.ppf(
                    alpha/2, #alpha
                    2*df_2['YLS_Cho'] #shape (N.B.: if shape is zero, then result should be defined as zero)
                    )),
                YLS_Cho_CI_hi = lambda df_2: (0.5*chi2.ppf(
                    1 - alpha/2, #alpha
                    2*(df_2['YLS_Cho']+1) #shape (N.B.: if shape is zero, then result should be defined as zero)
                    ))
            )
        #Calculate FNR and FPR (i.e. how many years of life lost to errors per patient was not allocated (FN) OR patient who was allocated (FP))
        .assign(FNR_LE_Cho = lambda df_0: df_0['FN_LE_Cho']/(df_0['Pop_N']-df_0['Allocated']),
                FPR_LE_Cho = lambda df_0: df_0['FP_LE_Cho']/df_0['Allocated']
            )
        .round(4) #round all number to two decimal places
        .set_index(groups)
    )

#DEFINE Age-Adjusted Comordity-Adjusted YLL Calculator#
def get_age_adjusted_Cho_YLS_stats(df_, groups, alpha=0.05):
    
    std_pop = pd.DataFrame({
    'Age_Group': ['<25', '25-34', '35-44', '45-54', '55-64', '65-74', '75-84', '>85'],
    'Std_Pop': [(0.013818 + 0.055317 + 0.145565 + 0.138646), 0.135573, 0.162613, 0.134834, 0.087247, 0.066037, 0.044842, 0.015508]})#.astype({'Age_Group': 'category'})

    if ('Age_Group' in groups): 
        groups_age = groups
    else:
        groups_age = groups + ['Age_Group']

    return (df_
        .fillna(0)
        .assign(YLL_Cho = lambda df_0: df_0['Cho_LE'].mask(df_0['Survived'] == 1, 0),  ## if survived then zero, otherwise retain life expectancy so that we sum to get YLLs.
                Exp_LE_Cho = lambda df_: df_['Baseline_Surv']*df_['Cho_LE']*df_['Capacity'], # Capacity x Cho_LE if patient would have survived with ventilator, otherwise 0 (if deceased even with ventilator)
                FN_LE_Cho = lambda df_: (df_['Baseline_Surv']*df_['Cho_LE']).mask(df_['Allocated'] == 1, 0), #return baseline*Cho_LE, then overwrite with 0 if allocated=1
                FP_LE_Cho = lambda df_: (df_['Allocated']*df_['Cho_LE']).mask(df_['Baseline_Surv'] == 1, 0) #return allocated*Cho_LE, then overwrite with 0 if baseline=1
            )
        .groupby(groups_age, as_index=True)
        .agg(Pop_N=pd.NamedAgg(column="Cho_LE", aggfunc="count"),
             Allocated=pd.NamedAgg(column="Allocated", aggfunc="sum"),
             LE_Total_Cho=pd.NamedAgg(column="Cho_LE", aggfunc="sum"),
             Exp_LE_Cho=pd.NamedAgg(column="Exp_LE_Cho", aggfunc="sum"),
             YLL_Cho=pd.NamedAgg(column="YLL_Cho", aggfunc="sum"),
             FN_LE_Cho=pd.NamedAgg(column="FN_LE_Cho", aggfunc="sum"), #Total life expectancy for everyone who would have survived, but was not allocated (false negatives)
             FP_LE_Cho=pd.NamedAgg(column="FP_LE_Cho", aggfunc="sum") #Total life expectancy for everyone who did NOT survive, but was allocated (false positives)
             )
        .reset_index()
        .merge(std_pop, on='Age_Group') #bring in standard pop for age-adjustment
        .assign(Std_Pop = lambda df_0: df_0['Std_Pop'].mask(df_0['Pop_N'] == 0, 0)) #zero out Std_Pop for each sub-group age-band where sub-group has no subjects (i.e. no AIAN in <25)
        .assign(w_i = lambda df_1: df_1['Std_Pop']/df_1['Pop_N']) #calc pop weight for each Age_Group and Race (use max for Fay and Freur, and avg for Tiwari mod)
        .assign(w_max = lambda df_2: df_2.groupby(groups)['w_i'].transform('max')) #find max pop weight for Fay and Freur CIs (note use of transform, see here https://stackoverflow.com/questions/35640364/python-pandas-max-value-in-a-group-as-a-new-column
        #Calculate Age-Adjusted Cho YLL and Cho YLS
        .assign(Age_Adj_FNR_LE_Cho = lambda df_3: (df_3['FN_LE_Cho']/(df_3['Pop_N']-df_3['Allocated'])) * df_3['Std_Pop'], #first converts FN LE into an FNR of LE, then multiples by proportion of std pop in that group
                Age_Adj_FPR_LE_Cho = lambda df_3: (df_3['FP_LE_Cho']/df_3['Allocated']) * df_3['Std_Pop'],    
                Age_Adj_YLL_Cho_rate = lambda df_3: (df_3['YLL_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                Age_Adj_LE_Total_Cho_rate = lambda df_3: (df_3['LE_Total_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
                Age_Adj_Exp_LE_Cho_rate = lambda df_3: (df_3['Exp_LE_Cho']/df_3['Pop_N']) * df_3['Std_Pop'], #calculate the crude YLL rate per age group, then multiply by std pop weight (https://seer.cancer.gov/seerstat/WebHelp/Rate_Algorithms.htm)
            )
        .groupby(groups, as_index=False).sum(numeric_only=True)
        .assign(w_max = lambda df_3: df_3['w_max']/len(df_.groupby('Age_Group').count())) #was 8 #divide sum of max pop weights by number of age-groups (to re-idnetify the max pop weight for Race)
        ##Calculate Age-Adj Years of Life Saved and CIs
        .assign(Age_Adj_YLS_Cho_rate = lambda df_0: (df_0['Age_Adj_LE_Total_Cho_rate']-df_0['Age_Adj_YLL_Cho_rate'])-df_0['Age_Adj_Exp_LE_Cho_rate'], #Calculate Raw Cho-adjusted Years of Life Saved for 'groups'
            )
        .drop(['Pop_N','Allocated','LE_Total_Cho','Exp_LE_Cho', 'YLL_Cho','FN_LE_Cho', 'FP_LE_Cho', 'Std_Pop', 'w_i', 'w_max',
               'Age_Adj_LE_Total_Cho_rate', 'Age_Adj_YLL_Cho_rate',  'Age_Adj_Exp_LE_Cho_rate',  
               ], axis=1)
        .round(4) #round all numbers to 4 decimals places
        .set_index(groups)
    )

In [7]:
###Use below to convert Stats to excel sheets####

##################
# Survival Rates #
##################
stats_overall_ECI = pd.concat([
    get_raw_stats(MC_ECI_Maryland, ['Protocol', 'ECI_Cutoff', 'Run']),
    get_raw_stats(MC_ECI_Maryland_Age, ['Protocol', 'ECI_Cutoff', 'Run'])
]).reset_index()

stats_race_ECI = pd.concat([
    pd.concat([get_raw_stats(MC_ECI_Maryland, ['Protocol', 'ECI_Cutoff', 'Run', 'Race']), get_age_adjusted_stats(MC_ECI_Maryland, ['Protocol', 'ECI_Cutoff', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_raw_stats(MC_ECI_Maryland_Age, ['Protocol', 'ECI_Cutoff', 'Run', 'Race']), get_age_adjusted_stats(MC_ECI_Maryland_Age, ['Protocol', 'ECI_Cutoff', 'Run', 'Race'])], axis=1).reset_index()
])

##########################
# Cho YLL Sheets #
##########################
stats_Cho_overall_ECI = pd.concat([
    get_Cho_YLS_stats(MC_ECI_Maryland, ['Protocol', 'ECI_Cutoff', 'Run']),
    get_Cho_YLS_stats(MC_ECI_Maryland_Age, ['Protocol', 'ECI_Cutoff', 'Run'])
]).reset_index()

stats_Cho_race_ECI = pd.concat([
    pd.concat([get_Cho_YLS_stats(MC_ECI_Maryland, ['Protocol', 'ECI_Cutoff', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(MC_ECI_Maryland, ['Protocol', 'ECI_Cutoff', 'Run', 'Race'])], axis=1).reset_index(),
    pd.concat([get_Cho_YLS_stats(MC_ECI_Maryland_Age, ['Protocol', 'ECI_Cutoff', 'Run', 'Race']), get_age_adjusted_Cho_YLS_stats(MC_ECI_Maryland_Age, ['Protocol', 'ECI_Cutoff', 'Run', 'Race'])], axis=1).reset_index()
])

with pd.ExcelWriter("MC-ECI-results-stats.xlsx") as writer:
# use to_excel function and specify the sheet_name and index
    # to store the dataframe in specified sheet
    stats_overall_ECI.to_excel(writer, sheet_name="Overall", index=False)
    stats_race_ECI.to_excel(writer, sheet_name="Race", index=False)
    stats_Cho_overall_ECI.to_excel(writer, sheet_name="Cho_Overall", index=False)
    stats_Cho_race_ECI.to_excel(writer, sheet_name="Cho_Race", index=False)
