# JAMA revision round 4

In [0]:
!pip install lifelines

Collecting lifelines
[?25l  Downloading https://files.pythonhosted.org/packages/1f/1b/c6060bf7b89fc8a2e4b14cda99b0ccd778a9ea8c5c0a3baf6f7ef8433961/lifelines-0.21.1-py3-none-any.whl (322kB)
[K     |████████████████████████████████| 327kB 4.6MB/s 
Installing collected packages: lifelines
Successfully installed lifelines-0.21.1


## Import packages

In [0]:
import pandas as pd, numpy as np, re
from numpy import exp, mean
from fancyimpute import IterativeImputer
from sklearn.ensemble import RandomForestClassifier
from lifelines import CoxPHFitter, AalenJohansenFitter, KaplanMeierFitter

In [0]:
from google.colab import drive
drive.mount('/content/drive')

## **Read data**

In [0]:
# whole cohort (N=1,478,506)
d = pd.read_csv('drive/My Drive/facility/Fortable3mi_ld_dec_censored.csv')
d['rucc_rural'] = np.where(d.RUCC_2013.isin([1, 2, 3]), 0, (np.where(d.RUCC_2013.isna(), np.nan, 1)))
d = d.drop(['RUCC_2013'], axis=1)
# d.loc[d['wl_time']>24, 'wl']=0
# d.loc[d['wl_time']>24, 'wl_time']=24
# d.loc[d['ld_time']>24, 'livingd']=0
# d.loc[d['ld_time']>24, 'ld_time']=24
# d.loc[d['dec_time']>24, 'deceasedt']=0
# d.loc[d['dec_time']>24, 'dec_time']=24
# d.to_csv('drive/My Drive/facility/jama_round4.csv', index=False)

#**Multiple imputation**


##**Random forest imputation for categorical**

In [0]:
for var in ['dialysis_mod1', 'insurance_esrd', 'rucc_rural', 'nephcare_cat2']:
    pred = ['sex_new', 'age_cat', 'race_new', var]
    imputer = IterativeImputer(n_iter=1, random_state=7, predictor=RandomForestClassifier(n_estimators=10))
    imputed = pd.DataFrame(imputer.fit_transform(d[pred]), columns=pred)
    d = d.drop(var, axis=1).join(imputed[var])


##**Bayesian Ridge linear imputation for continuous**


In [0]:
for var in ['Hospitalization_Rate_facility', 'Mortality_Rate_Facility', 'NEAR_DIST']:
    completed = []
    for i in range(5):
        pred = ['sex_new', 'age_cat', 'race_new', var]
        imputer = IterativeImputer(n_iter=5, sample_posterior=True, random_state=i)
        completed.append(imputer.fit_transform(d[pred]))
    completed_mean = np.mean(completed, axis=0)
    imputed = pd.DataFrame(completed_mean, columns=pred)
    if var == 'NEAR_DIST':
        m = imputed[imputed.NEAR_DIST > 0].NEAR_DIST.mean()
        imputed.NEAR_DIST = np.where(imputed.NEAR_DIST < 0, m, imputed.NEAR_DIST)
    d = d.drop(var, axis=1).join(imputed[var])

#**Create cohort for Cox model**

**1) dummy code and order levels based on table**

**2) drop unneeded variables**

In [0]:
# standard cohort
PH_data = d[['PROVUSRD', 'chain_class2', 'for_profit', 'sex_new', 'age_cat', 'race_new', 'dialysis_mod1', 'esrd_cause', 'bmi_35',
                 'ashd_new', 'chf',	'other_cardiac', 'cva_new',	'pvasc_new', 'hypertension', 'diabetes', 'copd_new',
                 'smoke_new', 'cancer_new', 'insurance_esrd', 'PATTXOP_MEDUNFITn','nephcare_cat2', 'profit_txc', 'profit_hosp',
                 'network_us_region_dfr', 'NEAR_DIST', 'rucc_rural', 'wl', 'wl_time', 'livingd', 'ld_time', 'deceasedt', 'dec_time']]
PH_data = PH_data.join(pd.get_dummies(PH_data.dialysis_mod1, prefix='dialysis_mod1', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(pd.Categorical(PH_data.insurance_esrd, [3, 2, 1, 4, 5], True), prefix='insurance_esrd', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(pd.Categorical(PH_data.age_cat, [5, 1, 2, 3, 4, 6], True), prefix='age_cat', drop_first=True)) # delete category "6" for ideal cohort!
PH_data = PH_data.join(pd.get_dummies(PH_data.race_new, prefix='race_new', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(PH_data.esrd_cause, prefix='esrd_cause', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(PH_data.network_us_region_dfr, prefix='network_us_region_dfr', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(pd.Categorical(PH_data.chain_class2, [6,5,2,1,3,4], True), prefix='chain_class2', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(PH_data.profit_txc, prefix='profit_txc', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(PH_data.profit_hosp, prefix='profit_hosp', drop_first=True))
PH_data = PH_data.drop(['dialysis_mod1', 'esrd_cause', 'age_cat', 'race_new', 'chain_class2', 'insurance_esrd', 'network_us_region_dfr', 'profit_txc', 'profit_hosp'], axis=1)

# ideal cohort
ideal = d[(d.INC_AGE < 66) &
  (d.pvasc_new == 0) &
  (d.chf == 0) &
  (d.cva_new == 0) &
  (d.PATTXOP_MEDUNFITn == 0)].reset_index(drop=True)

PH_data_ideal = ideal[['PROVUSRD', 'chain_class2', 'for_profit', 'sex_new', 'age_cat', 'race_new', 'dialysis_mod1', 'esrd_cause', 'bmi_35',
                 'ashd_new', 'chf',	'other_cardiac', 'cva_new',	'pvasc_new', 'hypertension', 'diabetes', 'copd_new',
                 'smoke_new', 'cancer_new', 'insurance_esrd', 'PATTXOP_MEDUNFITn','nephcare_cat2',
                 'network_us_region_dfr', 'NEAR_DIST', 'rucc_rural', 'wl', 'wl_time', 'livingd', 'ld_time', 'deceasedt', 'dec_time']]
PH_data_ideal = PH_data_ideal.join(pd.get_dummies(PH_data_ideal.dialysis_mod1, prefix='dialysis_mod1', drop_first=True))
PH_data_ideal = PH_data_ideal.join(pd.get_dummies(pd.Categorical(PH_data_ideal.insurance_esrd, [3, 2, 1, 4, 5], True), prefix='insurance_esrd', drop_first=True))
PH_data_ideal = PH_data_ideal.join(pd.get_dummies(pd.Categorical(PH_data_ideal.age_cat, [5, 1, 2, 3, 4], True), prefix='age_cat', drop_first=True)) # delete category "6" for ideal cohort!
PH_data_ideal = PH_data_ideal.join(pd.get_dummies(PH_data_ideal.race_new, prefix='race_new', drop_first=True))
PH_data_ideal = PH_data_ideal.join(pd.get_dummies(PH_data_ideal.esrd_cause, prefix='esrd_cause', drop_first=True))
PH_data_ideal = PH_data_ideal.join(pd.get_dummies(PH_data_ideal.network_us_region_dfr, prefix='network_us_region_dfr', drop_first=True))
PH_data_ideal = PH_data_ideal.join(pd.get_dummies(pd.Categorical(PH_data_ideal.chain_class2, [6,5,2,1,3,4], True), prefix='chain_class2', drop_first=True))
PH_data_ideal = PH_data_ideal.drop(['dialysis_mod1', 'esrd_cause', 'age_cat', 'race_new', 'chain_class2', 'insurance_esrd', 'network_us_region_dfr'], axis=1)


##**Table 2**

In [0]:
varlist=['age_cat',
 'sex_new',
 'race_new',
 'insurance_esrd',
 'esrd_cause',
 'dialysis_mod1',
 'bmi_35',
 'chf',
 'ashd_new',
 'other_cardiac',
 'cva_new',
 'pvasc_new',
 'hypertension',
 'diabetes',
 'copd_new',
 'smoke_new',
 'cancer_new',
 'nephcare_cat2',
 'PATTXOP_MEDUNFITn',
 'network_us_region_dfr',
 'rucc_rural',
 'NEAR_DIST']


In [0]:
cph = CoxPHFitter()
for time, status in zip(['wl_time', 'ld_time', 'dec_time'], ['wl', 'livingd', 'deceasedt']):
  print('-'*30, status,'-'*30)
  for exposure in varlist:
    crude = '|'.join([exposure, time, status])
    cph.fit(PH_data.filter(regex=crude), duration_col=time, event_col=status, step_size=0.5)
    print(round(pd.concat([exp(cph.hazards_).rename('HR'), exp(cph.confidence_intervals_)], 1), 2))

In [0]:
for case, time in zip(['wl', 'livingd', 'deceasedt'], ['wl_time', 'ld_time', 'dec_time']):
  tmp = d
  count = tmp.groupby('chain_class2')[case].sum().map('{:,}'.format)
  pyr = (tmp.groupby('chain_class2')[time].sum()/12).map(' /{:,.0f}'.format)
  print(count+pyr)
  count1 = tmp.groupby('for_profit')[case].sum().map('{:,}'.format)
  pyr1 = (tmp.groupby('for_profit')[time].sum()/12).map(' /{:,.0f}'.format)
  print(count1+pyr1)


##**Table 3**

In [0]:
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector
import statsmodels.formula.api as smf
import statsmodels.api as sm
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
utils = importr('utils')
utils.install_packages('msm')
msm = importr('msm')


In [0]:
# Truncate follow-up time
def truncate(t):
    e = d.copy()
    if t>0:
      e.loc[e['wl_time'] > t, 'wl']=0
      e.loc[e['wl_time'] > t, 'wl_time']=t
      e.loc[e['ld_time'] > t, 'livingd']=0
      e.loc[e['ld_time'] > t, 'ld_time']=t
      e.loc[e['dec_time'] > t, 'deceasedt']=0
      e.loc[e['dec_time'] > t, 'dec_time']=t
    return e

# dataset with aggregate sum for poisson model
for event, time in zip(['wl','livingd', 'deceasedt'],['wl_time', 'ld_time', 'dec_time']):
	for cutoff in [0, 12, 36, 60]:
		poissonset = truncate(cutoff).groupby(['for_profit', 'age_cat', 'sex_new', 'race_new'], as_index=False)[['wl', 'wl_time', 'livingd', 'ld_time', 'dec_time', 'deceasedt']].sum()
		poissonset['age_cat'] = pd.Categorical(poissonset['age_cat'], ordered=True, categories=[5, 1, 2, 3, 4, 6])
		poissonset['race_new'] = pd.Categorical(poissonset['race_new'], ordered=True, categories=[1, 2, 3, 4])

		# poisson regression
		poisson_model = smf.glm(event+'~for_profit+sex_new+age_cat+race_new',
								data=poissonset,
								family=sm.families.Poisson(),
								offset=np.log(poissonset[time]/12)).fit(method='newton')

		# extract coefficient
		b0 = poisson_model.params['Intercept']
		b1 = poisson_model.params['for_profit']

		# calculate rate difference and standard error
		effect = np.exp(b0 + b1) - np.exp(b0)
		vcov = poisson_model.cov_params().loc[['Intercept', 'for_profit'], ['Intercept', 'for_profit']]
		se = msm.deltamethod(ro.Formula('~exp(x1+x2)-exp(x1)'), FloatVector([b0, b1]), ro.r.matrix(vcov.values, nrow=2, ncol=2))
		print("%s %g-months for_profit vs. non_profit: %.2f (%.2f, %.2f) " %(event, cutoff, effect*100, (effect-1.96*float(np.array(se)))*100, (effect+1.96*float(np.array(se)))*100))
    
    


wl 0-months for_profit vs. non_profit: -4.26 (-4.35, -4.17) 
wl 12-months for_profit vs. non_profit: -9.62 (-9.89, -9.36) 
wl 36-months for_profit vs. non_profit: -6.75 (-6.90, -6.61) 
wl 60-months for_profit vs. non_profit: -5.38 (-5.49, -5.27) 
livingd 0-months for_profit vs. non_profit: -0.54 (-0.57, -0.51) 
livingd 12-months for_profit vs. non_profit: -0.74 (-0.80, -0.67) 
livingd 36-months for_profit vs. non_profit: -0.76 (-0.80, -0.71) 
livingd 60-months for_profit vs. non_profit: -0.64 (-0.67, -0.60) 
deceasedt 0-months for_profit vs. non_profit: -0.98 (-1.02, -0.95) 
deceasedt 12-months for_profit vs. non_profit: -0.23 (-0.26, -0.19) 
deceasedt 36-months for_profit vs. non_profit: -0.80 (-0.84, -0.75) 
deceasedt 60-months for_profit vs. non_profit: -1.08 (-1.13, -1.03) 


In [0]:
for event, time in zip(['wl','livingd', 'deceasedt'],['wl_time', 'ld_time', 'dec_time']):
  for cutoff in [0, 12, 36, 60]:
    poissonset = truncate(cutoff).groupby(['chain_class2', 'age_cat', 'sex_new', 'race_new'], as_index=False)[['wl', 'wl_time', 'livingd', 'ld_time', 'dec_time', 'deceasedt']].sum()
    poissonset['chain_class2'] = pd.Categorical(poissonset['chain_class2'], ordered=True, categories=[6, 5, 2, 1, 3, 4])
    poissonset['age_cat'] = pd.Categorical(poissonset['age_cat'], ordered=True, categories=[5, 1, 2, 3, 4, 6])
    poissonset['race_new'] = pd.Categorical(poissonset['race_new'], ordered=True, categories=[1, 2, 3, 4])

    # poisson regression
    poisson_model = smf.glm(event+'~chain_class2+sex_new+age_cat+race_new',
                            data=poissonset,
                            family=sm.families.Poisson(),
                            offset=np.log(poissonset[time]/12)).fit(method='newton')

		# extract coefficient
    for i in [5, 2, 1, 3, 4]:
      b0 = poisson_model.params['Intercept']
      b1 = poisson_model.params['chain_class2[T.'+str(i)+']']
      effect = np.exp(b0 + b1) - np.exp(b0)
      vcov = poisson_model.cov_params().loc[['Intercept', 'chain_class2[T.'+str(i)+']'], ['Intercept', 'chain_class2[T.'+str(i)+']']]
      se = msm.deltamethod(ro.Formula('~exp(x1+x2)-exp(x1)'), FloatVector([b0, b1]), ro.r.matrix(vcov.values, nrow=2, ncol=2))
      print("%s %g-months chain_%g: %.2f (%.2f, %.2f) " %(event, cutoff, i, effect*100, (effect-1.96*float(np.array(se)))*100, (effect+1.96*float(np.array(se)))*100))

wl 0-months chain_5: 6.17 (6.00, 6.34) 
wl 0-months chain_2: -1.68 (-1.76, -1.61) 
wl 0-months chain_1: -1.88 (-1.96, -1.80) 
wl 0-months chain_3: -1.75 (-1.84, -1.67) 
wl 0-months chain_4: -1.53 (-1.63, -1.44) 
wl 12-months chain_5: 11.50 (11.04, 11.96) 
wl 12-months chain_2: -4.60 (-4.83, -4.38) 
wl 12-months chain_1: -4.89 (-5.12, -4.66) 
wl 12-months chain_3: -4.73 (-4.96, -4.50) 
wl 12-months chain_4: -4.27 (-4.52, -4.01) 
wl 36-months chain_5: 9.18 (8.91, 9.45) 
wl 36-months chain_2: -2.85 (-2.98, -2.73) 
wl 36-months chain_1: -3.13 (-3.25, -3.00) 
wl 36-months chain_3: -2.97 (-3.10, -2.84) 
wl 36-months chain_4: -2.67 (-2.82, -2.52) 
wl 60-months chain_5: 7.58 (7.37, 7.80) 
wl 60-months chain_2: -2.20 (-2.29, -2.10) 
wl 60-months chain_1: -2.43 (-2.53, -2.33) 
wl 60-months chain_3: -2.30 (-2.40, -2.20) 
wl 60-months chain_4: -2.03 (-2.14, -1.91) 
livingd 0-months chain_5: 0.47 (0.42, 0.52) 
livingd 0-months chain_2: -0.32 (-0.35, -0.28) 
livingd 0-months chain_1: -0.32 (-0.36, -

In [0]:
cph = CoxPHFitter()
for time, status in zip(['wl_time', 'ld_time', 'dec_time'], ['wl', 'livingd', 'deceasedt']):
  print('-'*30, status,'-'*30)
  for exposure in ['chain', 'for_profit']:
    crude = '|'.join([exposure, time, status])
    model1 = crude + '|sex_new|age_cat|race_new'
    model2 = model1 + '|dialysis_mod1|esrd_cause|bmi_35|ashd_new|other_cardiac|hypertension|diabetes|'\
                  'copd_new|smoke_new|cancer_new'#chf|cva_new|pvasc_new'
    model3 = model2 + '|insurance_esrd|network_us_region_dfr|NEAR_DIST|rucc_rural|PATTXOP_MEDUNFITn' #PATTXOP_MEDUNFITn'
    for i, model in enumerate([crude, model1, model2, model3]):
      print('\n', 'model_'+str(i))
      cph.fit(PH_data.filter(regex=model), duration_col=time, event_col=status, step_size=0.5)
      print(round(pd.concat([exp(cph.hazards_[cph.hazards_.index.str.contains(exposure)]).rename('HR'), exp(cph.confidence_intervals_[cph.confidence_intervals_.index.str.contains(exposure)])], 1), 2))




##**Figure 2**

In [0]:
d['FIRST_SE'] = pd.to_datetime(d['FIRST_SE'], format='%m/%d/%Y')
fig2 = pd.DataFrame()

for event, time in zip(['wl', 'livingd', 'deceasedt'], ['wl_time', 'ld_time', 'dec_time']):
  print('-'*40, event, '-'*40)
  for exposure in ['chain_class2', 'for_profit']:
    print('\n', exposure)
    fig2 = pd.DataFrame()
    for year in range(2001, 2016, 2):
        num = d[(d['FIRST_SE']>=str(year)+'/01/01') & (d['FIRST_SE']<=str(year+1)+'/12/31')].groupby(exposure)[event].sum()
        pyr = d[(d['FIRST_SE']>=str(year)+'/01/01') & (d['FIRST_SE']<=str(year+1)+'/12/31')].groupby(exposure)[time].sum()/12
        rate = (num/pyr*100).rename(str(year)+'-'+"{0:0>2}".format(year+1-2000))
        fig2 = pd.concat([fig2, rate], 1)
    print(fig2.applymap('{:.2f}'.format))


##**Supplemental Table 3 (ideal cohort)**

In [0]:
cph = CoxPHFitter()
for time, status in zip(['wl_time', 'ld_time', 'dec_time'], ['wl', 'livingd', 'deceasedt']):
  print('-'*30, status,'-'*30)
  for exposure in ['chain', 'for_profit']:
    crude = '|'.join([exposure, time, status])
    model1 = crude + '|sex_new|age_cat|race_new'
    model2 = model1 + '|dialysis_mod1|esrd_cause|bmi_35|ashd_new|other_cardiac|hypertension|diabetes|'\
                  'copd_new|smoke_new|cancer_new'#chf|cva_new|pvasc_new'
    model3 = model2 + '|insurance_esrd|network_us_region_dfr|NEAR_DIST|rucc_rural' #PATTXOP_MEDUNFITn'
    for i, model in enumerate([crude, model1, model2, model3]):
      print('\n', 'model'+str(i))
      cph.fit(PH_data_ideal.filter(regex=model), duration_col=time, event_col=status, step_size=0.5)
      print(round(pd.concat([exp(cph.hazards_[cph.hazards_.index.str.contains(exposure)]).rename('HR'), exp(cph.confidence_intervals_[cph.confidence_intervals_.index.str.contains(exposure)])], 1), 2))



##**Supplemental Table 4**

In [0]:
for time, status in zip(['wl_time', 'ld_time', 'dec_time'], ['wl', 'livingd', 'deceasedt']):
  print('-'*30, status,'-'*30)
  for r, region in enumerate(['Northeast', 'South', 'Midwest', 'West']):
    print('\n', region)
    for exposure in ['chain', 'for_profit']:
      crude = '|'.join([exposure, time, status])
      model1 = crude + '|sex_new|age_cat|race_new'
      model2 = model1 + '|dialysis_mod1|esrd_cause|bmi_35|ashd_new|other_cardiac|hypertension|diabetes|'\
                        'copd_new|smoke_new|cancer_new|chf|cva_new|pvasc_new'#chf|cva_new|pvasc_new'
      model3 = model2 + '|insurance_esrd|NEAR_DIST|rucc_rural|PATTXOP_MEDUNFITn' #network_us_region_dfr'
      cph.fit(PH_data[d['network_us_region_dfr']==r].filter(regex=model3), duration_col=time, event_col=status, step_size=0.5)
      print(round(pd.concat([exp(cph.hazards_[cph.hazards_.index.str.contains(exposure)]).rename('HR'), exp(cph.confidence_intervals_[cph.confidence_intervals_.index.str.contains(exposure)])], 1), 2))


##Supplemental Table 5 & 6

In [0]:
cph = CoxPHFitter()
for time, status in zip(['wl_time', 'ld_time', 'dec_time'], ['wl', 'livingd', 'deceasedt']):
  print('-'*30, status,'-'*30)
  for exposure in ['profit_txc', 'profit_hosp']:
    crude = '|'.join([exposure, time, status])
    model1 = crude + '|sex_new|age_cat|race_new'
    model2 = model1 + '|dialysis_mod1|esrd_cause|bmi_35|ashd_new|other_cardiac|hypertension|diabetes|'\
                      'copd_new|smoke_new|cancer_new|chf|cva_new|pvasc_new'#chf|cva_new|pvasc_new'
    model3 = model2 + '|insurance_esrd|NEAR_DIST|rucc_rural|PATTXOP_MEDUNFITn|network_us_region_dfr' #network_us_region_dfr'
    for i, model in enumerate([crude, model1, model2, model3]):
      print('\n', 'model_'+str(i))
      cph.fit(PH_data.filter(regex=model), duration_col=time, event_col=status, step_size=0.2)
      print(round(pd.concat([exp(cph.hazards_[cph.hazards_.index.str.contains(exposure)]).rename('HR'), exp(cph.confidence_intervals_[cph.confidence_intervals_.index.str.contains(exposure)])], 1), 2))

## Additional: Cumulative incidence difference

In [46]:
cif = d.copy()
cif.loc[cif['death_wl']==1, 'wl']=2
cif.loc[cif['death_ld']==1, 'livingd']=2
cif.loc[cif['death_dec']==1, 'deceasedt']=2
d_p = cif[cif.for_profit==1]
d_np = cif[cif.for_profit==0]

for time, status in zip(['wl_time', 'ld_time', 'dec_time'], ['wl', 'livingd', 'deceasedt']):
  ajf_p = AalenJohansenFitter(calculate_variance=False).fit(d_p[time]/12, d_p[status], 1)
  ajf_np = AalenJohansenFitter(calculate_variance=False).fit(d_np[time]/12, d_np[status], 1)
  for t in [1, 3, 5]:
    cif_p = ajf_p.cumulative_density_.loc[slice(t)].tail(1).values
    cif_np = ajf_np.cumulative_density_.loc[slice(t)].tail(1).values
    cif_diff = cif_p - cif_np
    se = np.sqrt(cif_p * (1-cif_p) / len(d_p) +cif_np * (1-cif_np) / len(d_np))
    print('%s %g-year profit vs. non-profit: %.1f%% (%.1f%%, %.1f%%)' 
          %(status, t, cif_diff*100, (cif_diff -1.96 *se)*100, (cif_diff+1.96*se )*100))

                To resolve ties, data is randomly jittered.


wl 1-year profit vs. non-profit: -8.0% (-8.2%, -7.9%)
wl 3-year profit vs. non-profit: -12.5% (-12.7%, -12.3%)
wl 5-year profit vs. non-profit: -13.2% (-13.4%, -13.0%)
livingd 1-year profit vs. non-profit: -1.0% (-1.0%, -0.9%)
livingd 3-year profit vs. non-profit: -2.1% (-2.1%, -2.0%)
livingd 5-year profit vs. non-profit: -2.3% (-2.4%, -2.3%)
deceasedt 1-year profit vs. non-profit: -0.4% (-0.4%, -0.3%)
deceasedt 3-year profit vs. non-profit: -2.3% (-2.4%, -2.2%)
deceasedt 5-year profit vs. non-profit: -4.3% (-4.4%, -4.2%)


In [52]:
d_c1 = cif[cif.chain_class2==1]
d_c2 = cif[cif.chain_class2==2]
d_c3 = cif[cif.chain_class2==3]
d_c4 = cif[cif.chain_class2==4]
d_c5 = cif[cif.chain_class2==5]
d_c6 = cif[cif.chain_class2==6]


for time, status in zip(['wl_time', 'ld_time', 'dec_time'], ['wl', 'livingd', 'deceasedt']):
  ajf_c1 = AalenJohansenFitter(calculate_variance=False).fit(d_c1[time]/12, d_c1[status], 1)
  ajf_c2 = AalenJohansenFitter(calculate_variance=False).fit(d_c2[time]/12, d_c2[status], 1)
  ajf_c3 = AalenJohansenFitter(calculate_variance=False).fit(d_c3[time]/12, d_c3[status], 1)
  ajf_c4 = AalenJohansenFitter(calculate_variance=False).fit(d_c4[time]/12, d_c4[status], 1)
  ajf_c5 = AalenJohansenFitter(calculate_variance=False).fit(d_c5[time]/12, d_c5[status], 1)
  ajf_c6 = AalenJohansenFitter(calculate_variance=False).fit(d_c6[time]/12, d_c6[status], 1)
  for c, d_len, n in zip([ajf_c5, ajf_c2, ajf_c1, ajf_c3, ajf_c4], [d_c5, d_c2, d_c1, d_c3, d_c4], [5, 2, 1, 3, 4]):
    for t in [1, 3, 5]:
      cif_c6 = ajf_c6.cumulative_density_.loc[slice(t)].tail(1).values
      cif_c = c.cumulative_density_.loc[slice(t)].tail(1).values
      cif_diff = cif_c - cif_c6
      se = np.sqrt(cif_c6 * (1-cif_c6) / len(d_c6) +cif_c * (1-cif_c) / len(d_len))
      print('%s %g-year chain_class %g vs. 6: %.1f%% (%.1f%%, %.1f%%)' 
            %(status, t, n, cif_diff*100, (cif_diff -1.96 *se)*100, (cif_diff+1.96*se )*100))

                To resolve ties, data is randomly jittered.


wl 1-year chain_class 5 vs. 6: 10.4% (10.1%, 10.7%)
wl 3-year chain_class 5 vs. 6: 17.1% (16.8%, 17.5%)
wl 5-year chain_class 5 vs. 6: 18.4% (18.1%, 18.8%)
wl 1-year chain_class 2 vs. 6: -3.6% (-3.7%, -3.4%)
wl 3-year chain_class 2 vs. 6: -4.9% (-5.1%, -4.7%)
wl 5-year chain_class 2 vs. 6: -5.0% (-5.2%, -4.8%)
wl 1-year chain_class 1 vs. 6: -3.9% (-4.0%, -3.7%)
wl 3-year chain_class 1 vs. 6: -5.7% (-5.9%, -5.5%)
wl 5-year chain_class 1 vs. 6: -5.9% (-6.1%, -5.7%)
wl 1-year chain_class 3 vs. 6: -3.7% (-3.8%, -3.5%)
wl 3-year chain_class 3 vs. 6: -5.2% (-5.4%, -5.0%)
wl 5-year chain_class 3 vs. 6: -5.4% (-5.7%, -5.2%)
wl 1-year chain_class 4 vs. 6: -3.5% (-3.7%, -3.3%)
wl 3-year chain_class 4 vs. 6: -5.0% (-5.2%, -4.8%)
wl 5-year chain_class 4 vs. 6: -5.1% (-5.3%, -4.8%)
livingd 1-year chain_class 5 vs. 6: 0.9% (0.8%, 1.0%)
livingd 3-year chain_class 5 vs. 6: 2.3% (2.1%, 2.5%)
livingd 5-year chain_class 5 vs. 6: 2.7% (2.5%, 2.9%)
livingd 1-year chain_class 2 vs. 6: -0.6% (-0.6%, -0.5%)
l