# Appendix A

The following contains code for Appendix A of the paper, specifically, evaluating the optimism of the performance measures by holding out a validation set and calculating measures of performance on this held out set.

In [1]:
from __future__ import print_function

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import os, sys
import subprocess

# hack to append the parent path to the system path
# this allows us to import sepsis_utils
sys.path.append(os.path.dirname(os.path.abspath('.')))

from sepsis_utils import sepsis_utils as su
from sepsis_utils import roc_utils as ru

# default colours for prettier plots
col = [[0.9047, 0.1918, 0.1988],
    [0.2941, 0.5447, 0.7494],
    [0.3718, 0.7176, 0.3612],
    [1.0000, 0.5482, 0.1000],
    [0.4550, 0.4946, 0.4722],
    [0.6859, 0.4035, 0.2412],
    [0.9718, 0.5553, 0.7741],
    [0.5313, 0.3359, 0.6523]];
marker = ['v','o','d','^','s','o','+']
ls = ['-','-','-','-','-','s','--','--']
%matplotlib inline

First, load the data and split the data into a development and a validation set.

In [2]:
# load data
df = pd.read_csv('../sepsis3-df.csv',sep=',')

# split into development/validation sets
df_dev = df.sample(frac=0.5, replace=False, weights=None, random_state=12875, axis=0)
df_val = df.loc[[x for x in df.index if x not in df_dev.index],:]

# write to file the design matrix for the MFP model here - these files are used by the R code
X_header = ['age','elixhauser_hospital','hospital_expire_flag','angus',
            'is_male','race_black','race_other',
            'qsofa','sofa','sepsis3','sirs','lods','mlods']

np.savetxt('sepsis3-design-matrix-dev.csv', df_dev[X_header].values, fmt='%4.4f',
           delimiter=',', header=','.join(X_header), comments='')
np.savetxt('sepsis3-design-matrix-val.csv', df_val[X_header].values, fmt='%4.4f',
           delimiter=',', header=','.join(X_header), comments='')

Next, we train the baseline model (a multifractional polynomial model using only age/gender/comorbidity/race) on the development set and evaluate it on the validation set. 

In [3]:
target_header = 'hospital_expire_flag'

# baseline model
fn_in = "../sepsis3-design-matrix" # the function adds "-dev.csv" and "-val.csv"
fn_out = "../sepsis3-preds"

# train a baseline model with only four covariates
formula = target_header + " ~ fp(age) + fp(elixhauser_hospital) + is_male + race_black + race_other"
rcmd = ["Rscript ../r-make-sepsis3-models-dev-val.R", fn_in, fn_out, '"' + formula + '"']
err = subprocess.call(' '.join(rcmd), shell=True)
if err!=0:
    print('RScript returned error status {}.'.format(err))
else:
    # load in the predictions output by the R script
    pred_dev = pd.read_csv(fn_out + '-dev.csv', sep=',', header=0)
    pred_dev = pred_dev.values[:,0]
    
    pred_val = pd.read_csv(fn_out + '-val.csv', sep=',', header=0)
    pred_val = pred_val.values[:,0]
    
    # read in the targets
    tar_dev = pd.read_csv(fn_out + '-dev-tar.csv', sep=',', header=0)
    tar_dev = tar_dev.values[:,0]
    
    tar_val = pd.read_csv(fn_out + '-val-tar.csv', sep=',', header=0)
    tar_val = tar_val.values[:,0]

# print out the baseline AUROC of a model without severity of illness
# in Seymour2016, this was 0.58 (validation)
auc_mfp_dev, ci_mfp_dev = ru.calc_auc(pred_dev, tar_dev, with_ci=True, alpha=0.05)
auc_mfp_val, ci_mfp_val = ru.calc_auc(pred_val, tar_val, with_ci=True, alpha=0.05)


print('Performance of an MFP model without severity of illness predicting {}:'.format(target_header))
print('DEV (n = {:4g}): {:0.3f} [{:0.3f}, {:0.3f}]'.format(
        df_dev.shape[0], auc_mfp_dev, ci_mfp_dev[0], ci_mfp_dev[1]))
print('VAL (n = {:4g}): {:0.3f} [{:0.3f}, {:0.3f}]'.format(
        df_val.shape[0], auc_mfp_val, ci_mfp_val[0], ci_mfp_val[1]))

# retrain the model on the full data and to compare
fn_in = "../sepsis3-design-matrix.csv"
fn_out = "../sepsis3-preds.csv"
formula = target_header + " ~ fp(age) + fp(elixhauser_hospital) + is_male + race_black + race_other"
rcmd = ["Rscript ../r-make-sepsis3-models.R", fn_in, fn_out, '"' + formula + '"']
err = subprocess.call(' '.join(rcmd), shell=True)
if err!=0:
    print('RScript returned error status {}.'.format(err))
else:
    preds_mfp_b = pd.read_csv(fn_out, sep=',', header=0)
    preds_mfp_b = preds_mfp_b.values[:,0]
    auc_mfp_b, ci_mfp_b = ru.calc_auc(preds_mfp_b, df[target_header].values,
                                      with_ci=True,
                                      alpha=0.05)
    print('ALL (n = {:4g}): {:0.3f} [{:0.3f}, {:0.3f}] (trained/evaluated on all the data)'.format(
        df.shape[0], auc_mfp_b, ci_mfp_b[0], ci_mfp_b[1]))

Performance of an MFP model without severity of illness predicting hospital_expire_flag:
DEV (n = 3391): 0.703 [0.678, 0.727]
VAL (n = 3390): 0.691 [0.666, 0.716]
ALL (n = 6781): 0.697 [0.679, 0.714] (trained/evaluated on all the data)
