In [0]:
!pip install lifelines

In [0]:
import pandas as pd, numpy as np, re
from numpy import exp, mean
from fancyimpute import IterativeImputer
from sklearn.ensemble import RandomForestClassifier
from lifelines import CoxPHFitter, AalenJohansenFitter
# import statsmodels.api as sm
# from matplotlib import pyplot as plt
# from resample.bootstrap import bootstrap_ci, bootstrap

In [0]:
# whole cohort (N=1,478,506)
d = pd.read_csv('Fortable3mi_ld_dec_censored.csv')
d['rucc_metro'] = np.where(d.RUCC_2013.isin([1, 2, 3]), 1, (np.where(d.RUCC_2013.isna(), np.nan, 0)))
d = d.drop(['RUCC_2013'], axis=1)

In [0]:
# Random forest imputation for categorical
for var in ['dialysis_mod1', 'insurance_esrd', 'rucc_metro']:
    pred = ['sex_new', 'age_cat', 'race_new', var]
    imputer = IterativeImputer(n_iter=1, random_state=7, predictor=RandomForestClassifier(n_estimators=10))
    imputed = pd.DataFrame(imputer.fit_transform(d[pred]), columns=pred)
    d = d.drop(var, axis=1).join(imputed[var])

In [0]:
# Bayesian Ridge linear imputation for continuous
for var in ['Hospitalization_Rate_facility', 'Mortality_Rate_Facility', 'NEAR_DIST']:
    completed = []
    for i in range(5):
        pred = ['sex_new', 'age_cat', 'race_new', var]
        imputer = IterativeImputer(n_iter=5, sample_posterior=True, random_state=i)
        completed.append(imputer.fit_transform(d[pred]))
    completed_mean = np.mean(completed, axis=0)
    imputed = pd.DataFrame(completed_mean, columns=pred)
    if var == 'NEAR_DIST':
        m = imputed[imputed.NEAR_DIST > 0].NEAR_DIST.mean()
        imputed.NEAR_DIST = np.where(imputed.NEAR_DIST < 0, m, imputed.NEAR_DIST)
    d = d.drop(var, axis=1).join(imputed[var])


In [0]:
##Cox model
PH_data = d[['PROVUSRD', 'chain_class2', 'for_profit', 'sex_new', 'age_cat', 'race_new', 'dialysis_mod1', 'esrd_cause', 'bmi_35',
                 'ashd_new', 'chf',	'other_cardiac', 'cva_new',	'pvasc_new', 'hypertension', 'diabetes', 'copd_new',
                 'smoke_new', 'cancer_new', 'insurance_esrd', 'PATTXOP_MEDUNFITn',
                 'network_us_region_dfr', 'NEAR_DIST', 'rucc_metro', 'wl', 'wl_time', 'livingd', 'ld_time', 'deceasedt', 'dec_time']]
PH_data = PH_data.join(pd.get_dummies(PH_data.dialysis_mod1, prefix='dialysis_mod1', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(PH_data.insurance_esrd, prefix='insurance_esrd', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(pd.Categorical(PH_data.age_cat, [5, 1, 2, 3, 4, 6], True), prefix='age_cat', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(PH_data.race_new, prefix='race_new', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(PH_data.esrd_cause, prefix='esrd_cause', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(PH_data.network_us_region_dfr, prefix='network_us_region_dfr', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(pd.Categorical(PH_data.chain_class2, [6,5,2,1,3,4], True), prefix='chain_class2', drop_first=True))
PH_data = PH_data.drop(['dialysis_mod1', 'esrd_cause', 'age_cat', 'race_new', 'chain_class2', 'insurance_esrd', 'network_us_region_dfr'], axis=1)

In [0]:
crude = 'chain|deceasedt|dec_time'
model1 = crude + '|sex_new|age_cat|race_new'
model2 = model1 + '|dialysis_mod1|esrd_cause|bmi_35|ashd_new|other_cardiac|hypertension|diabetes|'\
                  'copd_new|smoke_new|cancer_new|chf|cva_new|pvasc_new'
#chf|cva_new|pvasc_new'
model3 = model2 + '|insurance_esrd|network_us_region_dfr|NEAR_DIST|rucc_metro|PATTXOP_MEDUNFITn' #PATTXOP_MEDUNFITn'


In [0]:
cph = CoxPHFitter()
cph.fit(PH_data.filter(regex=model3), duration_col='dec_time', event_col='deceasedt', step_size=0.5)
print(round(pd.concat([exp(cph.hazards_).rename('HR'), exp(cph.confidence_intervals_)], 1), 2))

                           HR  lower-bound  upper-bound
sex_new                  0.83         0.81         0.84
bmi_35                   0.52         0.51         0.53
ashd_new                 0.82         0.78         0.86
chf                      0.64         0.62         0.66
other_cardiac            0.75         0.72         0.79
cva_new                  0.68         0.65         0.72
pvasc_new                0.65         0.62         0.68
hypertension             1.30         1.27         1.34
diabetes                 1.10         1.07         1.13
copd_new                 0.55         0.51         0.59
smoke_new                0.59         0.57         0.62
cancer_new               0.64         0.60         0.68
PATTXOP_MEDUNFITn        0.31         0.27         0.36
NEAR_DIST                1.00         1.00         1.00
rucc_metro               1.09         1.06         1.12
dialysis_mod1_1.0        2.87         2.80         2.94
dialysis_mod1_2.0        1.64         1.56      