# JAMA sensitivity analysis

Use profit status from USRDS 'NU_P_NP' instead from DFR/DFC to address loss of subjects due to merging

In [1]:
!pip install lifelines

Collecting lifelines
[?25l  Downloading https://files.pythonhosted.org/packages/2c/24/12f3898a716d09b1793584d38e0a37aad22d8ee7cef3c5e708b98a836882/lifelines-0.22.7-py2.py3-none-any.whl (338kB)
[K     |████████████████████████████████| 348kB 46.0MB/s 
Collecting autograd-gamma>=0.3 (from lifelines)
  Downloading https://files.pythonhosted.org/packages/3e/87/788c4bf90cc5c534cb3b7fdb5b719175e33e2658decce75e35e2ce69766f/autograd_gamma-0.4.1-py2.py3-none-any.whl
Installing collected packages: autograd-gamma, lifelines
Successfully installed autograd-gamma-0.4.1 lifelines-0.22.7


## Import packages



In [2]:
import pandas as pd, numpy as np, re
from numpy import exp, mean
from fancyimpute import IterativeImputer
from sklearn.ensemble import RandomForestClassifier
from lifelines import CoxPHFitter, AalenJohansenFitter, KaplanMeierFitter
import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


## **Read data**

In [0]:
# whole cohort (N=1,675,259 26 features)
d = pd.read_csv('drive/My Drive/facility/Pat_usrds17_bef_dfcdrf_merge.csv')
d = d.loc[d['NU_P_NP'].isin(['For-profit', 'Non-profit'])].reset_index(drop=True)

#**Multiple imputation**


##**Random forest imputation for categorical**

In [0]:
pred = ['sex_new', 'age_cat', 'race_new', 'insurance_esrd']
imputer = IterativeImputer(n_iter=1, random_state=7, predictor=RandomForestClassifier(n_estimators=100))
imputed = pd.DataFrame(imputer.fit_transform(d[pred]), columns=pred)
d = d.drop('insurance_esrd', 1).join(imputed['insurance_esrd'])

#**Create cohort for Cox model**

**1) dummy code and order levels based on table**

**2) drop unneeded variables**

In [0]:
# standard cohort
PH_data = d[['PROVUSRD', 'NU_P_NP', 'sex_new', 'age_cat', 'race_new', 'esrd_cause', 'bmi_35',
                 'ashd_new', 'chf',	'other_cardiac', 'cva_new',	'pvasc_new', 'hypertension', 'diabetes', 'copd_new',
                 'smoke_new', 'cancer_new', 'insurance_esrd', 'PATTXOP_MEDUNFITn','nephcare_cat','wl', 'wl_time', 'livingd', 'ld_time', 'deceasedt', 'dec_time']]
PH_data = PH_data.join(pd.get_dummies(pd.Categorical(PH_data.insurance_esrd, [3, 2, 1, 4, 5], True), prefix='insurance_esrd', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(pd.Categorical(PH_data.age_cat, [5, 1, 2, 3, 4, 6], True), prefix='age_cat', drop_first=True)) # delete category "6" for ideal cohort!
PH_data = PH_data.join(pd.get_dummies(PH_data.race_new, prefix='race_new', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(PH_data.esrd_cause, prefix='esrd_cause', drop_first=True))
PH_data = PH_data.join(pd.get_dummies(pd.Categorical(PH_data.NU_P_NP, ['Non-profit', 'For-profit'], True), prefix='profit_status', drop_first=True))
PH_data = PH_data.drop(['insurance_esrd', 'esrd_cause', 'age_cat', 'race_new', 'NU_P_NP'], axis=1)

In [103]:
cph = CoxPHFitter()
for time, status in zip(['wl_time', 'ld_time', 'dec_time'], ['wl', 'livingd', 'deceasedt']):
  print('-'*30, status,'-'*30)
  for exposure in ['profit_status_For-profit']:
    crude = '|'.join([exposure, time, status])
    model1 = crude + '|sex_new|age_cat|race_new'
    model2 = model1 + 'esrd_cause|bmi_35|ashd_new|other_cardiac|hypertension|diabetes|'\
                  'copd_new|smoke_new|cancer_new|chf|cva_new|pvasc_new'
    model3 = model2 + '|insurance_esrd|PATTXOP_MEDUNFITn' #PATTXOP_MEDUNFITn'
    cph.fit(PH_data.filter(regex=model3), duration_col=time, event_col=status, step_size=0.5)
    print(round(pd.concat([cph.hazard_ratios_[cph.hazard_ratios_.index.str.contains(exposure)].rename('HR'), exp(cph.confidence_intervals_[cph.confidence_intervals_.index.str.contains(exposure)])], 1), 2))

------------------------------ wl ------------------------------
                            HR  95% lower-bound  95% upper-bound
profit_status_For-profit  0.18             0.17             0.18
------------------------------ livingd ------------------------------
                            HR  95% lower-bound  95% upper-bound
profit_status_For-profit  0.39             0.39              0.4
------------------------------ deceasedt ------------------------------
                            HR  95% lower-bound  95% upper-bound
profit_status_For-profit  0.42             0.41             0.42
