In [None]:
import pandas as pd
import numpy as np
from tableone import TableOne

In [None]:
import sys
import os, sys
dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path: sys.path.append(dir1)
from experiment.read_file import read_file 

df = pd.read_csv('../experiment/data/mimic4_admissions.csv')
df

In [None]:
df.ethnicity = df.ethnicity.apply(lambda x: x.title())
df.ethnicity.value_counts()

In [None]:
# combine 'Unkown' with 'Unable to Obtain'
df.loc[df['ethnicity'].isin(['Unable To Obtain','Unknown']),'ethnicity'] = 'Unknown/Unable to Obtain' 
df.ethnicity.value_counts()

# make a 'tableone' style table

In [None]:
df_tbl = df.copy()
groups = ['ethnicity','gender']
df_tbl['Admission Decision'] = df_tbl['y'].apply(lambda x: 'Admit' if x else 'Discharge')
mytable = TableOne(df_tbl, 
                   columns=groups, 
                   categorical=['ethnicity','gender'], #,'age_binned'],
                   groupby='Admission Decision', 
#                    nonnormal=groups,
                   overall=False,
                   missing=False,
                   pval=True,
                   row_percent=True,
                   pval_adjust='bonferroni',
                   rename={
                           'ethnicity':'Ethnicity',
                           'gender':'Gender'
                          }
         
        )
display(mytable)
# mytable.to_latex('../overleaf/tbls/tableone.tex',
#                  column_format='llrrrrl'
#                 )

In [None]:
import pdb 
OUTCOME = 'Admission Decision'
def ccr(x):
#     pdb.set_trace()
    cases = (x=='Admit').sum()
    count = len(x)
    pct = int(round(cases/count*100,0))
    return f'{int(cases)}/{int(count)} ({pct}%)'
def case_control_ratio(x, outcome=OUTCOME):
#     pdb.set_trace()
    return ccr(x[outcome])
    



# calculate case/control ratios

In [None]:
df_ccr = (df_tbl.groupby(groups,as_index=False)
          .apply(case_control_ratio)
          .rename(columns={None:'Admit/Discharge (%)'})
         )
# df_ccr.name='Admission Decision'
display(df_ccr)
df_ccr.to_latex() #'../overleaf/tbls/case_count_ratio_intersection.tex')
# df_ccr.columns
for g in groups:
    for g2 in groups:
        if g == g2: 
            continue
        for level, dfg in df_tbl.groupby(g2):
    #         display(dfg)
            try:
                df_ccr = df_ccr.append({
                    g:'Overall',
                    g2:level,
                    'Admit/Discharge (%)': ccr(dfg['Admission Decision'])

                }, ignore_index=True)
            except Exception as e:
                print(e)
    #             raise e
    #             pdb.set_trace()
entry = {
'ethnicity':'Overall',
'gender':'Overall',
'Admit/Discharge (%)': ccr(df_tbl['Admission Decision'])
}
# pdb.set_trace()
df_ccr = df_ccr.append(entry, ignore_index=True)
df_ccr

In [None]:
df_ccr['ethnicity'].unique()
eth_order = ['American Indian/Alaska Native', 'Asian', 'Black/African American',
       'Hispanic/Latino', 'Other', 'Unknown/Unable to Obtain', 'White',
       'Overall']
gen_order = ['F','M','Overall']

# make intersection prevalence table 

In [None]:

df_ccr = df_ccr.rename(columns={'gender':'Gender',
                               'ethnicity':'Ethnoracial Group'})
tmp = (df_ccr.pivot(index='Ethnoracial Group',columns='Gender',values='Admit/Discharge (%)')
 .loc[eth_order]
)
display(tmp)
# tmp.to_latex('../overleaf/tbls/case_control_intersections.tex',
#              column_format='lrrr'
#             )
# df_ccr.groupby(groups).count()