In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

from IPython.display import display
from tableone import TableOne
from scipy.stats import chi2_contingency, kruskal

pd.options.display.precision = 3

## Load useful DataFrames

In [2]:
# Adult first ICU stays
df = pd.read_csv('data_collection/icu_first_18.csv', index_col=0)
df = df.set_index(['subject_id', 'hadm_id', 'icustay_id'], verify_integrity=True)
df_icu_first_18 = df.drop(columns=df.columns)

# Admission demographics
df = pd.read_csv('data_collection/adm_demographics.csv')
df = df.set_index(['subject_id', 'hadm_id'])
df = df.filter(['age', 'gender', 'ethnicity'])
# df_demographics is in every end-table, so this is an effective filter
df = df.join(df_icu_first_18, how='inner')
df_demographics = df

# Admission demographics for adjustments
df_demo_adj = pd.concat([df['age'],
                         pd.get_dummies(df['gender']),
                         pd.get_dummies(df['ethnicity'])], axis=1)
df_demo_adj = df_demo_adj.drop(['M', 'OTHER'], axis=1)

# Max chloride
df = pd.read_csv('data_collection/chloride.csv')
df = df.set_index(['subject_id', 'hadm_id', 'icustay_id'])
df = df.filter(['icu_day', 'chloride_max'])
df = df.query('icu_day <= 7 & chloride_max.notnull()')
df['chl_110'] = (df['chloride_max'] >= 110).astype(int)

# Max chloride of first 7 days
df_chloride = df.filter(['chloride_max'])
df_chloride = df_chloride.groupby(['subject_id', 'hadm_id', 'icustay_id']).max()

# Chloride >= 110 in first 7 days
df_chl_110 = df.filter(['chl_110'])
df_chl_110 = df_chl_110.groupby(['subject_id', 'hadm_id', 'icustay_id']).max()

# Count of chloride >= 110 in first 7 days
df_chl_110_count = df.filter(['chl_110'])
df_chl_110_count = df_chl_110_count.groupby(['subject_id', 'hadm_id', 'icustay_id']).sum()
df_chl_110_count = df_chl_110_count.rename(columns={'chl_110': 'chl_110_count'})

# Average daily chloride load of first 7 days, in 100 mEqs
df = pd.read_csv('data_collection/chloride.csv')
df = df.set_index(['subject_id', 'hadm_id', 'icustay_id'])
df = df.filter(['icu_day', 'chloride_input_meq'])
df = df.query('icu_day <= 7 & chloride_input_meq.notnull()')
df = df.groupby(['subject_id', 'hadm_id', 'icustay_id']) \
       .agg({'icu_day': 'max', 'chloride_input_meq': 'sum'})
df['daily_chl_load'] = (df['chloride_input_meq'] / 100) / df['icu_day']
df_chl_load = df.filter(['daily_chl_load'])

# SOFA score on admission (day 1)
df = pd.read_csv('data_collection/sofa_pan.csv')
df = df.set_index(['subject_id', 'hadm_id', 'icustay_id'])
df = df.filter(['day', 'sofa'])
df = df.query('day == 1')
df_sofa = df.filter(['sofa'])

# ICU mortality
df = pd.read_csv('data_collection/icu_mort.csv')
df = df.set_index(['subject_id', 'hadm_id', 'icustay_id'])
df = df.filter(['intime', 'outtime', 'in_icu_mort'])
df = df.query('intime.notnull() & outtime.notnull()')
df['LOS'] = (pd.to_datetime(df['outtime']) - pd.to_datetime(df['intime'])).dt.days
df_mort = df.filter(['LOS', 'in_icu_mort'])  # need LOS for MODS calculation

# MODS on day 7
sofa_subscores = ['cardiovascular', 'cns', 'coagulation', 'liver', 'renal', 'respiration']
df = pd.read_csv('data_collection/sofa_pan.csv')
df = df.set_index(['subject_id', 'hadm_id', 'icustay_id'])
df = df.filter(['day'] + sofa_subscores)
df = df.query('day == 7')
df['od_sum'] = sum((df[subscore] > 1).astype(int) for subscore in sofa_subscores)
df = df.filter(['od_sum'])
df = df.join(df_mort, how='inner')
df['mods'] = ((df['od_sum'] > 1) | ((df['LOS'] < 7) & (df['in_icu_mort'] == 1))).astype(int)
df_mods = df.filter(['mods'])

df_mort = df_mort.filter(['in_icu_mort'])  # toss LOS since we no longer need it

# New AKI
df = pd.read_csv('data_collection/new_aki.csv', index_col=0)
df = df.set_index(['icustay_id'], verify_integrity=True)
df = df.filter(['dif'])
df['new_aki'] = (-df['dif']).astype(bool).astype(int)  # convert to binary flag
df_aki = df.filter(['new_aki'])

## Define useful functions

In [3]:
def run_kruskal(df_neg, df_pos):
    """Display results of a Kruskal-Wallis H test.
    
    @param df_neg: DataFrame containing the negative samples
    @param df_pos: DataFrame containing the positive samples
    """
    summary = pd.concat([df_neg.describe(), df_pos.describe()], axis=1)
    summary.columns = ['(-)', '(+)']
    display(summary)
    print kruskal(df_neg, df_pos)

def run_chi2(index, columns):
    """Display results of a chi-square test of independence.
    
    @param index: Values to group by in the rows
    @param columns: Values to group by in the columns
    """
    print "Chi-square test of independence with Yates' continuity correction"
    c_table = pd.crosstab(index, columns)
    display(c_table)
    chi2, p, dof, expected = chi2_contingency(c_table)  # correction=True (Yates' correction)
    print "X-squared = %s" % chi2
    print "dof       = %s" % dof
    print "p-value   = %s" % p

def run_glm(df):
    """Display results (odds ratios) of a GLM regression.

    @param df: DataFrame with columns [exog1, exog2, ..., endog]
    """
    endog = df[df.columns[-1]]
    exog = sm.add_constant(df[df.columns[:-1]])
    glm = sm.GLM(endog, exog, family=sm.families.Binomial())  # == sm.Logit(endog, exog)
    result = glm.fit()
    print result.summary()

    odds = pd.concat([result.params, result.conf_int()], axis=1)[1:]  # toss const (first row)
    odds.columns = ['Odds Ratio', '2.5%', '97.5%']
    display(np.exp(odds))

# Table 1. Demographics of Study Population

In [4]:
df_comorbidities = pd.read_csv('data_collection/current_comorbidities.csv', index_col=0)
df_comorbidities = df_comorbidities.set_index(['subject_id', 'hadm_id'], verify_integrity=True)
df_comorbidities = df_comorbidities.drop(columns=['admittime'])

df_combined = df_demographics.join(df_comorbidities, how='inner') \
                             .join(df_chl_110, how='inner') \
                             .join(df_sofa, how='inner') # We don't impute SOFA, so make sure patients have a score

columns = ['age', 'gender', 'ethnicity', 'CANCER', 'CARDIOVASCULAR', 'DIABETES', 'HEPATIC',
           'RENAL', 'RESPIRATORY', 'MULTIPLE COMORBIDITIES']
categorical = ['gender', 'ethnicity', 'CANCER', 'CARDIOVASCULAR', 'DIABETES', 'HEPATIC', 
               'RENAL', 'RESPIRATORY', 'MULTIPLE COMORBIDITIES']
groupby = 'chl_110'
nonnormal = ['age']
display(TableOne(df_combined, columns, categorical, groupby, nonnormal,
                 labels={'age': 'Age', 'gender': 'Gender', 'ethnicity': 'Ethnicity',
                         'chl_110': 'Hyperchloremia (>=110)'},
                 pval=True, isnull=False, label_suffix=True))

Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by Hyperchloremia (>=110),Grouped by Hyperchloremia (>=110),Grouped by Hyperchloremia (>=110),Grouped by Hyperchloremia (>=110)
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,pval,ptest
variable,level,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
n,,29893,18181,,
"Age, median [Q1,Q3]",,"65.0 [52.4,77.4]","66.9 [53.8,78.5]",<0.001,Kruskal-Wallis
"Gender, n (%)",F,12684 (42.4),8324 (45.8),<0.001,Chi-squared
"Gender, n (%)",M,17209 (57.6),9857 (54.2),,
"Ethnicity, n (%)",ASIAN,603 (2.0),518 (2.8),<0.001,Chi-squared
"Ethnicity, n (%)",BLACK,3006 (10.1),1614 (8.9),,
"Ethnicity, n (%)",HISPANIC,1053 (3.5),598 (3.3),,
"Ethnicity, n (%)",OTHER,3754 (12.6),2521 (13.9),,
"Ethnicity, n (%)",WHITE,21477 (71.8),12930 (71.1),,
"CANCER, n (%)",0,26805 (89.7),16693 (91.8),<0.001,Chi-squared


# 1a(1): Max Chloride and Mortality

In [5]:
df_combined = df_chloride.join(df_sofa, how='inner') \
                         .join(df_demo_adj, how='inner') \
                         .join(df_mort, how='inner')

## - _Univariate association_

In [6]:
run_kruskal(df_combined.query('in_icu_mort == 0').filter(['chloride_max']),
            df_combined.query('in_icu_mort == 1').filter(['chloride_max']))

Unnamed: 0,(-),(+)
count,44516.0,3558.0
mean,107.706,109.971
std,5.879,7.946
min,80.0,74.0
25%,104.0,105.0
50%,108.0,110.0
75%,111.0,115.0
max,155.0,148.0


KruskalResult(statistic=342.3778666040378, pvalue=1.9365057497361326e-76)


## - _Independent association adjusted for day 1 SOFA_

In [7]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:            in_icu_mort   No. Observations:                48074
Model:                            GLM   Df Residuals:                    48065
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -10820.
Date:                Tue, 03 Sep 2019   Deviance:                       21641.
Time:                        08:33:08   Pearson chi2:                 4.78e+04
No. Iterations:                     7   Covariance Type:             nonrobust
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
const           -8.5787      0.323    -26.532      0.000      -9.212      -7.945
chloride_max     0.0341      0.003     11.973 

  return ptp(axis=axis, out=out, **kwargs)


Unnamed: 0,Odds Ratio,2.5%,97.5%
chloride_max,1.035,1.029,1.04
sofa,1.312,1.299,1.326
age,1.02,1.018,1.023
F,1.23,1.142,1.324
ASIAN,0.721,0.567,0.917
BLACK,0.493,0.422,0.577
HISPANIC,0.451,0.347,0.586
WHITE,0.611,0.555,0.672


# 1a(2): Max Chloride and MODS on Day 7

In [8]:
df_combined = df_chloride.join(df_sofa, how='inner') \
                         .join(df_demo_adj, how='inner') \
                         .join(df_mods, how='left')
df_combined = df_combined.fillna({'mods':0})

## - _Univariate association_

In [9]:
run_kruskal(df_combined.query('mods == 0').filter(['chloride_max']),
            df_combined.query('mods == 1').filter(['chloride_max']))

Unnamed: 0,(-),(+)
count,46184.0,1890.0
mean,107.75,110.897
std,6.034,6.526
min,74.0,88.0
25%,104.0,107.0
50%,108.0,111.0
75%,111.0,115.0
max,154.0,155.0


KruskalResult(statistic=456.8910904216048, pvalue=2.2826988478487348e-101)


## - _Independent association adjusted for day 1 SOFA_

In [10]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                   mods   No. Observations:                48074
Model:                            GLM   Df Residuals:                    48065
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -6702.3
Date:                Tue, 03 Sep 2019   Deviance:                       13405.
Time:                        08:33:09   Pearson chi2:                 4.33e+04
No. Iterations:                     7   Covariance Type:             nonrobust
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
const          -10.0621      0.416    -24.213      0.000     -10.877      -9.248
chloride_max     0.0543      0.004     14.882 

Unnamed: 0,Odds Ratio,2.5%,97.5%
chloride_max,1.056,1.048,1.063
sofa,1.336,1.32,1.353
age,0.994,0.991,0.997
F,0.995,0.901,1.099
ASIAN,0.616,0.432,0.878
BLACK,0.704,0.575,0.861
HISPANIC,0.622,0.454,0.852
WHITE,0.786,0.689,0.896


# 1a(3): Max Chloride and New AKI

In [11]:
df_combined = df_chloride.join(df_sofa, how='inner') \
                         .join(df_demo_adj, how='inner') \
                         .join(df_aki, how='left')
df_combined = df_combined.fillna({'new_aki':0})

## - _Univariate association_

In [12]:
run_kruskal(df_combined.query('new_aki == 0').filter(['chloride_max']),
            df_combined.query('new_aki == 1').filter(['chloride_max']))

Unnamed: 0,(-),(+)
count,30841.0,17233.0
mean,107.283,108.93
std,6.182,5.757
min,77.0,74.0
25%,104.0,105.0
50%,107.0,109.0
75%,111.0,112.0
max,154.0,155.0


KruskalResult(statistic=933.3955372579728, pvalue=5.397312007640231e-205)


## - _Independent association adjusted for day 1 SOFA_

In [13]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                new_aki   No. Observations:                48074
Model:                            GLM   Df Residuals:                    48065
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -30735.
Date:                Tue, 03 Sep 2019   Deviance:                       61469.
Time:                        08:33:10   Pearson chi2:                 4.79e+04
No. Iterations:                     5   Covariance Type:             nonrobust
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
const           -5.8317      0.184    -31.730      0.000      -6.192      -5.471
chloride_max     0.0483      0.002     29.107 

Unnamed: 0,Odds Ratio,2.5%,97.5%
chloride_max,1.049,1.046,1.053
sofa,0.961,0.955,0.968
age,1.007,1.006,1.008
F,0.904,0.87,0.94
ASIAN,0.77,0.674,0.88
BLACK,0.595,0.548,0.647
HISPANIC,0.743,0.661,0.834
WHITE,0.799,0.756,0.845


# 1b(1): Chloride >= 110 in First 7 Days and Mortality

In [14]:
df_combined = df_chl_110.join(df_sofa, how='inner') \
                        .join(df_demo_adj, how='inner') \
                        .join(df_mort, how='inner')

## - _Univariate association_

In [15]:
run_chi2(df_combined['chl_110'], df_combined['in_icu_mort'])

Chi-square test of independence with Yates' continuity correction


in_icu_mort,0,1
chl_110,Unnamed: 1_level_1,Unnamed: 2_level_1
0,28190,1703
1,16326,1855


X-squared = 334.271766255463
dof       = 1
p-value   = 1.1282589666776486e-74


## - _Independent association adjusted for day 1 SOFA_

In [16]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:            in_icu_mort   No. Observations:                48074
Model:                            GLM   Df Residuals:                    48065
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -10855.
Date:                Tue, 03 Sep 2019   Deviance:                       21710.
Time:                        08:33:11   Pearson chi2:                 4.83e+04
No. Iterations:                     7   Covariance Type:             nonrobust
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -5.0224      0.102    -49.453      0.000      -5.222      -4.823
chl_110        0.3195      0.037      8.572      0.0

Unnamed: 0,Odds Ratio,2.5%,97.5%
chl_110,1.376,1.28,1.481
sofa,1.315,1.301,1.328
age,1.02,1.018,1.023
F,1.236,1.149,1.331
ASIAN,0.738,0.58,0.937
BLACK,0.486,0.415,0.569
HISPANIC,0.447,0.343,0.581
WHITE,0.608,0.553,0.669


# 1b(2): Chloride >= 110 in First 7 Days and MODS on Day 7

In [17]:
df_combined = df_chl_110.join(df_sofa, how='inner') \
                        .join(df_demo_adj, how='inner') \
                        .join(df_mods, how='left')
df_combined = df_combined.fillna({'mods':0})

## - _Univariate association_

In [18]:
run_chi2(df_combined['chl_110'], df_combined['mods'].astype(int))

Chi-square test of independence with Yates' continuity correction


mods,0,1
chl_110,Unnamed: 1_level_1,Unnamed: 2_level_1
0,29112,781
1,17072,1109


X-squared = 363.05823904988927
dof       = 1
p-value   = 6.076715643844065e-81


## - _Independent association adjusted for day 1 SOFA_

In [19]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                   mods   No. Observations:                48074
Model:                            GLM   Df Residuals:                    48065
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -6738.0
Date:                Tue, 03 Sep 2019   Deviance:                       13476.
Time:                        08:33:12   Pearson chi2:                 4.37e+04
No. Iterations:                     7   Covariance Type:             nonrobust
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -4.4133      0.122    -36.178      0.000      -4.652      -4.174
chl_110        0.6005      0.050     11.966      0.0

Unnamed: 0,Odds Ratio,2.5%,97.5%
chl_110,1.823,1.652,2.012
sofa,1.338,1.321,1.355
age,0.994,0.991,0.997
F,1.008,0.913,1.113
ASIAN,0.639,0.449,0.909
BLACK,0.693,0.566,0.848
HISPANIC,0.612,0.446,0.838
WHITE,0.781,0.685,0.89


# 1b(3): Chloride >= 110 in First 7 Days and New AKI

In [20]:
df_combined = df_chl_110.join(df_sofa, how='inner') \
                        .join(df_demo_adj, how='inner') \
                        .join(df_aki, how='left')
df_combined = df_combined.fillna({'new_aki':0})

## - _Univariate association_

In [21]:
run_chi2(df_combined['chl_110'], df_combined['new_aki'])

Chi-square test of independence with Yates' continuity correction


new_aki,0.0,1.0
chl_110,Unnamed: 1_level_1,Unnamed: 2_level_1
0,20448,9445
1,10393,7788


X-squared = 620.5708563791441
dof       = 1
p-value   = 5.618211213551841e-137


## - _Independent association adjusted for day 1 SOFA_

In [22]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                new_aki   No. Observations:                48074
Model:                            GLM   Df Residuals:                    48065
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -30838.
Date:                Tue, 03 Sep 2019   Deviance:                       61675.
Time:                        08:33:13   Pearson chi2:                 4.80e+04
No. Iterations:                     5   Covariance Type:             nonrobust
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.7940      0.046    -17.231      0.000      -0.884      -0.704
chl_110        0.5186      0.020     25.967      0.0

Unnamed: 0,Odds Ratio,2.5%,97.5%
chl_110,1.68,1.615,1.747
sofa,0.963,0.956,0.969
age,1.007,1.006,1.008
F,0.906,0.872,0.942
ASIAN,0.776,0.68,0.887
BLACK,0.584,0.538,0.635
HISPANIC,0.733,0.653,0.824
WHITE,0.792,0.749,0.837


# 1c(1): Chloride >= 110 Count and Mortality

In [23]:
df_combined = df_chl_110_count.join(df_sofa, how='inner') \
                              .join(df_demo_adj, how='inner') \
                              .join(df_mort, how='inner')

## - _Univariate association_

In [24]:
run_chi2(df_combined['chl_110_count'], df_combined['in_icu_mort'])

Chi-square test of independence with Yates' continuity correction


in_icu_mort,0,1
chl_110_count,Unnamed: 1_level_1,Unnamed: 2_level_1
0,28190,1703
1,9754,740
2,3182,420
3,1410,222
4,813,156
5,502,114
6,337,91
7,328,112


X-squared = 862.5237238227259
dof       = 7
p-value   = 5.9312819479541215e-182


## - _Independent association adjusted for day 1 SOFA_

In [25]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:            in_icu_mort   No. Observations:                48074
Model:                            GLM   Df Residuals:                    48065
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -10776.
Date:                Tue, 03 Sep 2019   Deviance:                       21553.
Time:                        08:33:14   Pearson chi2:                 4.71e+04
No. Iterations:                     7   Covariance Type:             nonrobust
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const            -5.0095      0.101    -49.518      0.000      -5.208      -4.811
chl_110_count     0.1706      0.011     15.

Unnamed: 0,Odds Ratio,2.5%,97.5%
chl_110_count,1.186,1.161,1.211
sofa,1.308,1.294,1.321
age,1.02,1.018,1.022
F,1.234,1.146,1.329
ASIAN,0.734,0.577,0.934
BLACK,0.481,0.411,0.564
HISPANIC,0.448,0.345,0.583
WHITE,0.609,0.553,0.67


# 1c(2): Chloride >= 110 Count and MODS on Day 7

In [26]:
df_combined = df_chl_110_count.join(df_sofa, how='inner') \
                              .join(df_demo_adj, how='inner') \
                              .join(df_mods, how='left')
df_combined = df_combined.fillna({'mods':0})

## - _Univariate association_

In [27]:
run_chi2(df_combined['chl_110_count'], df_combined['mods'].astype(int))

Chi-square test of independence with Yates' continuity correction


mods,0,1
chl_110_count,Unnamed: 1_level_1,Unnamed: 2_level_1
0,29112,781
1,10178,316
2,3413,189
3,1495,137
4,846,123
5,509,107
6,331,97
7,300,140


X-squared = 2058.8939551921194
dof       = 7
p-value   = 0.0


## - _Independent association adjusted for day 1 SOFA_

In [28]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                   mods   No. Observations:                48074
Model:                            GLM   Df Residuals:                    48065
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -6473.3
Date:                Tue, 03 Sep 2019   Deviance:                       12947.
Time:                        08:33:15   Pearson chi2:                 4.34e+04
No. Iterations:                     7   Covariance Type:             nonrobust
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const            -4.4403      0.122    -36.291      0.000      -4.680      -4.200
chl_110_count     0.3380      0.012     27.

Unnamed: 0,Odds Ratio,2.5%,97.5%
chl_110_count,1.402,1.369,1.436
sofa,1.325,1.309,1.342
age,0.993,0.99,0.996
F,1.001,0.905,1.107
ASIAN,0.622,0.433,0.894
BLACK,0.683,0.557,0.838
HISPANIC,0.629,0.458,0.864
WHITE,0.784,0.686,0.896


# 1c(3): Chloride >= 110 Count and New AKI

In [29]:
df_combined = df_chl_110_count.join(df_sofa, how='inner') \
                              .join(df_demo_adj, how='inner') \
                              .join(df_aki, how='left')
df_combined = df_combined.fillna({'new_aki':0})

## - _Univariate association_

In [30]:
run_chi2(df_combined['chl_110_count'], df_combined['new_aki'])

Chi-square test of independence with Yates' continuity correction


new_aki,0.0,1.0
chl_110_count,Unnamed: 1_level_1,Unnamed: 2_level_1
0,20448,9445
1,6279,4215
2,1862,1740
3,791,841
4,526,443
5,359,257
6,273,155
7,303,137


X-squared = 792.3112959938028
dof       = 7
p-value   = 8.464843170508035e-167


## - _Independent association adjusted for day 1 SOFA_

In [31]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                new_aki   No. Observations:                48074
Model:                            GLM   Df Residuals:                    48065
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -30985.
Date:                Tue, 03 Sep 2019   Deviance:                       61971.
Time:                        08:33:15   Pearson chi2:                 4.80e+04
No. Iterations:                     5   Covariance Type:             nonrobust
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const            -0.7012      0.046    -15.355      0.000      -0.791      -0.612
chl_110_count     0.1428      0.007     19.

Unnamed: 0,Odds Ratio,2.5%,97.5%
chl_110_count,1.154,1.137,1.17
sofa,0.965,0.959,0.972
age,1.006,1.005,1.008
F,0.914,0.88,0.95
ASIAN,0.794,0.695,0.907
BLACK,0.576,0.531,0.626
HISPANIC,0.734,0.654,0.824
WHITE,0.791,0.749,0.836


# 2(1): Daily Chloride Load and Mortality

In [32]:
df_combined = df_chl_load.join(df_sofa, how='inner') \
                         .join(df_demo_adj, how='inner') \
                         .join(df_mort, how='inner')

## - _Univariate association_

In [33]:
run_kruskal(df_combined.query('in_icu_mort == 0').filter(['daily_chl_load']),
            df_combined.query('in_icu_mort == 1').filter(['daily_chl_load']))

Unnamed: 0,(-),(+)
count,41800.0,3596.0
mean,1.857,2.515
std,1.476,2.557
min,0.0003593,0.0005091
25%,0.816,0.9344
50%,1.569,1.85
75%,2.512,3.159
max,29.5,31.5


KruskalResult(statistic=167.30564169073344, pvalue=2.868587858614148e-38)


## - _Independent association adjusted for day 1 SOFA_

In [34]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:            in_icu_mort   No. Observations:                45398
Model:                            GLM   Df Residuals:                    45389
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -10756.
Date:                Tue, 03 Sep 2019   Deviance:                       21513.
Time:                        08:33:16   Pearson chi2:                 4.62e+04
No. Iterations:                     6   Covariance Type:             nonrobust
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const             -5.2272      0.104    -50.192      0.000      -5.431      -5.023
daily_chl_load     0.1341      0.010    

Unnamed: 0,Odds Ratio,2.5%,97.5%
daily_chl_load,1.144,1.122,1.165
sofa,1.294,1.281,1.307
age,1.023,1.021,1.026
F,1.254,1.165,1.35
ASIAN,0.757,0.595,0.962
BLACK,0.509,0.434,0.597
HISPANIC,0.468,0.36,0.61
WHITE,0.612,0.557,0.673


# 2(2): Daily Chloride Load and MODS Score on Day 7

In [35]:
df_combined = df_chl_load.join(df_sofa, how='inner') \
                         .join(df_demo_adj, how='inner') \
                         .join(df_mods, how='left')
df_combined = df_combined.fillna({'mods':0})

## - _Univariate association_

In [36]:
run_kruskal(df_combined.query('mods == 0').filter(['daily_chl_load']),
            df_combined.query('mods == 1').filter(['daily_chl_load']))

Unnamed: 0,(-),(+)
count,43520.0,1873.0
mean,1.905,2.024
std,1.603,1.493
min,0.0003593,0.003
25%,0.8175,0.961
50%,1.584,1.689
75%,2.549,2.737
max,31.5,15.221


KruskalResult(statistic=24.18139508346573, pvalue=8.767517312571767e-07)


## - _Independent association adjusted for day 1 SOFA_

In [37]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                   mods   No. Observations:                45398
Model:                            GLM   Df Residuals:                    45389
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -6668.3
Date:                Tue, 03 Sep 2019   Deviance:                       13337.
Time:                        08:33:17   Pearson chi2:                 4.25e+04
No. Iterations:                     7   Covariance Type:             nonrobust
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const             -3.9909      0.124    -32.266      0.000      -4.233      -3.748
daily_chl_load    -0.0837      0.014    

Unnamed: 0,Odds Ratio,2.5%,97.5%
daily_chl_load,0.92,0.894,0.946
sofa,1.357,1.339,1.374
age,0.994,0.991,0.997
F,1.027,0.929,1.134
ASIAN,0.668,0.468,0.952
BLACK,0.678,0.553,0.83
HISPANIC,0.611,0.446,0.839
WHITE,0.782,0.686,0.892


# 2(3): Daily Chloride Load and New AKI

In [38]:
df_combined = df_chl_load.join(df_sofa, how='inner') \
                         .join(df_demo_adj, how='inner') \
                         .join(df_aki, how='left')
df_combined = df_combined.fillna({'new_aki':0})

## - _Univariate association_

In [39]:
run_kruskal(df_combined.query('new_aki == 0').filter(['daily_chl_load']),
            df_combined.query('new_aki == 1').filter(['daily_chl_load']))

Unnamed: 0,(-),(+)
count,28730.0,16671.0
mean,1.926,1.882
std,1.713,1.377
min,0.0003593,0.003
25%,0.77,0.912
50%,1.557,1.635
75%,2.58,2.518
max,31.5,27.302


KruskalResult(statistic=29.56387205244534, pvalue=5.410404267493035e-08)


## - _Independent association adjusted for day 1 SOFA_

In [40]:
run_glm(df_combined)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                new_aki   No. Observations:                45398
Model:                            GLM   Df Residuals:                    45389
Model Family:                Binomial   Df Model:                            8
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -29669.
Date:                Tue, 03 Sep 2019   Deviance:                       59339.
Time:                        08:33:18   Pearson chi2:                 4.54e+04
No. Iterations:                     5   Covariance Type:             nonrobust
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const             -0.6311      0.049    -12.964      0.000      -0.727      -0.536
daily_chl_load    -0.0052      0.006    

Unnamed: 0,Odds Ratio,2.5%,97.5%
daily_chl_load,0.995,0.983,1.007
sofa,0.975,0.968,0.981
age,1.007,1.006,1.008
F,0.925,0.889,0.961
ASIAN,0.83,0.726,0.95
BLACK,0.615,0.565,0.669
HISPANIC,0.741,0.659,0.834
WHITE,0.792,0.749,0.837
