In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_curve, average_precision_score, brier_score_loss, make_scorer
from sklearn.metrics import auc, roc_curve, roc_auc_score
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, sem
import scipy.stats
import os
from ehr_utils import *

In [2]:
output_dir = "/home/blhill/paper"
pred_probs_f = "model_pred_probs.txt"

dnn_pred_probs_f = "/home/nrakocz/EhrMain/predictions_proba.csv"
# files with predicted probability of death for each input feature set
# cols should look like: ADMSN_ID INPT_DEATH_YN PRED_DEATH_ELASTICNET PRED_DEATH_LOGREG PRED_DEATH_RF PRED_DEATH_XGB
onlyASA_probs_f = os.path.join(output_dir, os.path.join("asa", pred_probs_f))
preopASA_probs_f = os.path.join(output_dir, os.path.join("preop_asa", pred_probs_f))
predASA_probs_f = os.path.join(output_dir, os.path.join("preop_imp_asa", pred_probs_f))
charlson_probs_f = os.path.join(output_dir, os.path.join("charlson", pred_probs_f))
preop_probs_f = os.path.join(output_dir, os.path.join("preop", pred_probs_f))

preopASA_probs_notime_f = os.path.join(output_dir, os.path.join("preop_asa_no_lab_times", pred_probs_f))
predASA_probs_notime_f = os.path.join(output_dir, os.path.join("preop_imp_asa_no_lab_times", pred_probs_f))
preop_probs_notime_f = os.path.join(output_dir, os.path.join("preop_no_lab_times", pred_probs_f))

In [3]:
# read data into pandas data frame
onlyASA_df = pd.read_csv(onlyASA_probs_f, sep="\t", header=0)
preopASA_df = pd.read_csv(preopASA_probs_f, sep="\t", header=0)
predASA_df = pd.read_csv(predASA_probs_f, sep="\t", header=0)
charlson_df = pd.read_csv(charlson_probs_f, sep="\t", header=0)
preop_df = pd.read_csv(preop_probs_f, sep="\t", header=0)

preopASA_notime_df = pd.read_csv(preopASA_probs_notime_f, sep="\t", header=0)
predASA_notime_df = pd.read_csv(predASA_probs_notime_f, sep="\t", header=0)
preop_notime_df = pd.read_csv(preop_probs_notime_f, sep="\t", header=0)

dnn_df = pd.read_csv(dnn_pred_probs_f, sep=",", header=0)

In [4]:
preop_v2_f = os.path.join(output_dir, "preop/model_pred_probs_v2.txt")
preop_v2_df = pd.read_csv(preop_v2_f, sep="\t", header=0)

In [5]:
# these are the configurations for comparing the AUROCs for different models
# choose the models to compare
model_name_1 = "Random Forest"
model_name_2 = "Random Forest"
# and choose the corresponding datasets to compare (model_name_1 goes with df1, similarly for 2)
df1 = predASA_df
df2 = preopASA_df

In [6]:
roc_auc_vec_1 = []
roc_auc_vec_2 = []
# for each fold from cross-validation, get AUROC for each model
for name, group in df1.groupby("test_index"):
    roc_auc_vec_1.append(roc_auc_score(group["INPT_DEATH_YN"], group[model_name_1]))
for name, group in df2.groupby("test_index"): 
    roc_auc_vec_2.append(roc_auc_score(group["INPT_DEATH_YN"], group[model_name_2]))
    
print roc_auc_vec_1
print roc_auc_vec_2

A_1 = np.mean(roc_auc_vec_1)
A_2 = np.mean(roc_auc_vec_2)
SE_1 = sem(roc_auc_vec_1, ddof=0)
SE_2 = sem(roc_auc_vec_2, ddof=0)
print "A_1: {} (SE_1: {})".format(A_1, SE_1)
print "A_2: {} (SE_2: {})".format(A_2, SE_2)

[0.9392901388096904, 0.9384445205568333, 0.9228757388590025, 0.9307884675433361, 0.9158663260962112]
[0.9420695005574226, 0.9422091345615041, 0.9290515090275998, 0.937730798181222, 0.924556026080527]
A_1: 0.929453038373 (SE_1: 0.00403612828003)
A_2: 0.935123393682 (SE_2: 0.00318616838249)


In [7]:
print(np.std(roc_auc_vec_1)/np.sqrt(len(roc_auc_vec_1)))
print(np.std(roc_auc_vec_2)/np.sqrt(len(roc_auc_vec_2)))

0.004036128280031662
0.003186168382492256


In [8]:
def ci(results):
    return scipy.stats.t.interval(0.95, len(results)-1, loc=np.mean(results), scale=scipy.stats.sem(results))

print(ci(roc_auc_vec_1))
print(ci(roc_auc_vec_2))

(0.9169242504293963, 0.9419818263166334)
(0.9252330172497656, 0.9450137701135444)


In [9]:
controls_df_1 = df1[df1["INPT_DEATH_YN"] == False]
controls_df_2 = df2[df2["INPT_DEATH_YN"] == False]
cases_df_1 = df1[df1["INPT_DEATH_YN"] == True]
cases_df_2 = df2[df2["INPT_DEATH_YN"] == True]
print controls_df_1.shape
print controls_df_2.shape
print cases_df_1.shape
print cases_df_2.shape
# calculate correlation between predictions of cases
r_controls = pearsonr(controls_df_1[model_name_1], controls_df_2[model_name_2])[0]
print "r_controls:", r_controls
# calculate correlation between predictions of cases
r_cases = pearsonr(cases_df_1[model_name_1], cases_df_2[model_name_2])[0]
print "r_cases:", r_cases
avg_r = (r_controls + r_cases) / 2.0
print "avg r:\t\t", avg_r
print "avg AUROC:\t", (A_1 + A_2) / 2.0
print "now use average r and average AUROC to look up r value from Hanley & McNeil (1983) paper table"

(58575, 9)
(58575, 9)
(1103, 9)
(1103, 9)
r_controls: 0.9760734870965603
r_cases: 0.9741962258156045
avg r:		0.9751348564560824
avg AUROC:	0.9322882160273349
now use average r and average AUROC to look up r value from Hanley & McNeil (1983) paper table


This is the table I extrapolated frm the Hanley & McNeil (1983) paper


| average correlation between ratings  | Average Area 0.925  | Average Area 0.950 |
| :-------------: |:-------------:| :-------------:| 
| 0.90 | 0.85 | 0.84 | 
| 0.92 | 0.87 | 0.86 | 
| 0.94 | 0.90 | 0.89 |
| 0.96 | 0.92 | 0.92 | 
| 0.98 | 0.95 | 0.94 |

In [19]:
# get this value from the Hanley & McNeil (1983) paper using above values, or 
# from the table I created above if avg r > 0.90
r = 0.94

# $z = \frac{A_1-A_2}{\sqrt{SE_{1}^{2}+SE_{2}^{2} - 2rSE_1 SE_2}}$
where $A_1$ and $SE_1$ refer to the observed area and estimated standard error of the ROC area associated with model 1, similarly for 2, and $r$ is the estimated correlation between $A_1$ and $A_2$

In [20]:
def z(A_1, A_2, SE_1, SE_2, r):
    return (A_1 - A_2)/(np.sqrt((SE_1**2) + (SE_2**2) - 2*r*SE_1*SE_2))

In [21]:
z_value = z(A_1, A_2, SE_1, SE_2, r)
p_value = scipy.stats.norm.sf(abs(z_value))*2 # two sided test
print "z: {} (p={})".format(z_value, p_value)

z: -3.76719486234 (p=0.000165092138404)
