# Sepsis-3 evaluation in the MIMIC-III database

This notebook goes over the evaluation of the new Sepsis-3 guidelines in the MIMIC database. The goals of this analysis include:

1. Evaluating the Sepsis-3 guidelines in MIMIC using the same methodology as in the research paper
2. Evaluating the Sepsis-3 guidelines against ANGUS criteria
3. Assessing if there are interesting subgroup(s) which are missed by the criteria

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import logit


import sys
import os
# the following line adds the parent folder to the path
# it lets us import the sepsis_utils package
sys.path.insert(1, os.path.join(sys.path[0], '..'))
from sepsis_utils import sepsis_utils as su
from sepsis_utils import roc_utils as ru

from sklearn.pipeline import Pipeline

# used for train/test splits
from sklearn.cross_validation import train_test_split

# used to impute mean for data
from sklearn.preprocessing import Imputer

# normalize the data
from sklearn import preprocessing

# logistic regression is our model of choice
from sklearn.linear_model import LogisticRegression

# used to create confusion matrix
from sklearn.metrics import confusion_matrix

from sklearn.cross_validation import cross_val_score

# used to calculate AUROC/accuracy
from sklearn import metrics

# for calibration curve of severity scores
from sklearn.calibration import calibration_curve

# default colours for prettier plots
col = [[0.9047, 0.1918, 0.1988],
    [0.2941, 0.5447, 0.7494],
    [0.3718, 0.7176, 0.3612],
    [1.0000, 0.5482, 0.1000],
    [0.4550, 0.4946, 0.4722],
    [0.6859, 0.4035, 0.2412],
    [0.9718, 0.5553, 0.7741],
    [0.5313, 0.3359, 0.6523]];
marker = ['v','o','d','^','s','o','+']
ls = ['-','-','-','-','-','s','--','--']
%matplotlib inline

from __future__ import print_function

In [None]:
# load data
excl = 'adult = 1 and icustay_num = 1'
df = su.get_data(exclusions=excl)

# define outcome
target_header = "angus"
y = df[target_header].values == 1

# define the covariates to be added in the MFP model (used for table of AUROCs)
preds_header = ['sirs','sofa','lods','qsofa']

# Study questions

1. How well do the guidelines detect sepsis (Angus criteria) in the antibiotics/culture subset?
2. How well do the guidelines predict mortality (in-hospital) in the antibiotics/culture subset?
3. What factors would improve the sensitivity of the guidelines?
4. What factors would improve the specificity of the guidelines?

## Angus criteria evaluation

In [None]:
# define "predictions" according to the SEPSIS-3 guidelines:
#  suspicion of infection, qSOFA >= 2, and SOFA >= 2
yhat = (df.qsofa.values >= 2) & (df.sofa.values>=2)

print('\n SEPSIS-3 guidelines for Angus criteria sepsis \n')
# generate evaluation metrics
print('Accuracy = {}'.format(metrics.accuracy_score(y, yhat)))

su.print_cm(y, yhat) # print confusion matrix

We need to call an R script to run the fractional polynomial model on the data. All this is done in subfunctions, which evaluate:

* `print_auc_table` - the severity scores on their own
* `print_auc_table_baseline` - the severity scores in a vanilla regression
* `print_auc_table_given_preds` - the AUROC of the given predictions (which are generated using an MFP)

In [None]:
# reproduce the confusion matrix plot
su.print_auc_table(df, preds_header, target_header)
su.print_auc_table_baseline(df, preds_header, target_header)

TODO: implement MFP model.

```python
# call a subprocess to run the R script to generate fractional polynomial predictions
import subprocess
fn_in = "sepsis3-design-matrix.csv"
fn_out = "sepsis3-preds.csv"
rcmd = ["Rscript r-make-sepsis3-models.R", fn_in, fn_out, target_header]
err = subprocess.call(' '.join(rcmd), shell=True)
if err!=0:
    print('RScript returned error status {}.'.format(err))
else:
    # load in the predictions
    pred_baseline = pd.read_csv(fn_out, sep=',', header=0)
    
# loop through each severity score, build an MFP model for each
fn_in = "sepsis3-design-matrix.csv"
fn_out = "sepsis3-preds.csv"
preds_mfp = dict()
for p in preds_header:
    rcmd = ["Rscript r-make-sepsis3-models.R", fn_in, fn_out, target_header, p] # note 4th argument is covariate 'p'
    err = subprocess.call(' '.join(rcmd), shell=True)
    if err!=0:
        print('RScript returned error status {}.'.format(err))
    else:
        # load in the predictions
        pred = pd.read_csv(fn_out, sep=',', header=0)
        preds_mfp[p] = pred.values[:,0]
        

su.print_auc_table_given_preds(preds_mfp, y, preds_header=preds_header) # optional argument fixes order of output
```

In [None]:
# ROC for qSOFA
fpr_qsofa, tpr_qsofa, thresholds_qsofa = metrics.roc_curve(y, df.qsofa.values)
auc_qsofa = metrics.auc(fpr_qsofa, tpr_qsofa)

# ROC for SOFA
fpr_sofa, tpr_sofa, thresholds_sofa = metrics.roc_curve(y, df.sofa.values)
auc_sofa = metrics.auc(fpr_sofa, tpr_sofa)


# ROC for SEPSIS-3
fpr_s3, tpr_s3, thresholds_s3 = metrics.roc_curve(y, (df.qsofa.values >= 2) & (df.sofa.values >= 2))
auc_s3 = metrics.auc(fpr_s3, tpr_s3)

# ROC for SIRS
fpr_sirs, tpr_sirs, thresholds_sirs = metrics.roc_curve(y, df.sirs.values)
auc_sirs = metrics.auc(fpr_sirs, tpr_sirs)

# ROC for LODS
fpr_lods, tpr_lods, thresholds_lods = metrics.roc_curve(y, df.lods.values)
auc_lods = metrics.auc(fpr_lods, tpr_lods)

# plot the data
plt.figure(figsize=[9,9])
plt.plot(fpr_qsofa, tpr_qsofa, 'o:',
         color=col[0], linewidth=2, markersize=10,
         label='qSOFA (AUC = %0.2f)' % auc_qsofa)
plt.plot(fpr_sofa, tpr_sofa, '^-',
         color=col[1], linewidth=2, markersize=10,
         label='SOFA (AUC = %0.2f)' % auc_sofa)
plt.plot(fpr_sirs, tpr_sirs, 's-',
         color=col[2], linewidth=2, markersize=10,
         label='SIRS (AUC = %0.2f)' % auc_sirs)
plt.plot(fpr_lods, tpr_lods, 'd-',
         color=col[3], linewidth=2, markersize=10,
         label='LODS (AUC = %0.2f)' % auc_lods)

# add in the combination of SIRS/SOFA
#plt.plot(fpr_s3, tpr_s3, 'd--',
#         color=col[3], linewidth=2, markersize=10,
#         label='SEPSIS-3 (AUC = %0.2f)' % auc_s3)

plt.legend(loc="lower right")

plt.plot([0,1], [0,1], '--',
         color=[0,0,0], linewidth=2)
# reformat the plot
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate',fontsize=14)
plt.ylabel('True Positive Rate',fontsize=14)
plt.title('ROC against ' + target_header,fontsize=14)
plt.show()

In [None]:
# define "predictions" according to the SEPSIS-3 guidelines:
#  suspicion of infection, qSOFA >= 2, and SOFA >= 2
yhat_all = [df.qsofa.values >= 2,
            df.sofa.values >= 2,
            df.sepsis3.values,
            df.sirs.values >= 2,
            df.lods.values >= 2]
yhat_names = ['qsofa','sofa','seps3','SIRS', 'LODS']

# define "targets", angus critera
y_all = [y, y, y, y, y]

stats_all = su.print_op_stats(yhat_all, y_all,
               yhat_names=yhat_names,
               header=target_header)

## Histograms comparing qSOFA

In [None]:
# histogram of the qSOFA values in septic/non-septic population
qsofa_alive = df.qsofa.values[~y]
qsofa_dead = df.qsofa.values[y]

xi = [-0.5,0.5,1.5,2.5,3.5]

prevalence = np.mean(y)

# plot the data
plt.figure(figsize=[9,9])
n0, bins0, patches0 = plt.hist(qsofa_alive, bins=xi, normed=True, color=col[0], alpha=0.5,
         label='qSOFA - ' + target_header + '=0')
n1, bins1, patches1 = plt.hist(qsofa_dead, bins=xi, normed=True, color=col[1], alpha=0.5,
         label='qSOFA - ' + target_header + '=1')

plt.legend(loc="upper right")

# reformat the plot
plt.xlim([-0.5,4.5])
#plt.ylim([-0.05, 1.05])
plt.xlabel('qSOFA',fontsize=14)
plt.ylabel('Proportion - normalized within groups',fontsize=14)

plt.figure(figsize=[9,9])

N = len(y)
plt.bar(bins0[0:-1], 100.0*n0/N*qsofa_alive.shape[0], width=1, color=col[0], alpha=0.5,
         label='qSOFA - ' + target_header + '=0')
plt.bar(bins1[0:-1], 100.0*n1/N*qsofa_dead.shape[0], width=1, color=col[1], alpha=0.5,
         label='qSOFA - ' + target_header + '=1')
plt.legend()
plt.ylabel('Percent of TOTAL patients')
plt.show()