In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve, average_precision_score
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import seaborn as sns; sns.set()

import os 

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/home/ccorbin/.config/gcloud/application_default_credentials.json' 
os.environ['GCLOUD_PROJECT'] = 'mining-clinical-decisions' 
%load_ext google.cloud.bigquery

from google.cloud import bigquery
client=bigquery.Client()

%matplotlib inline

### Load in MultiLabel Cohort

In [None]:
q_cohort = """select * from traige_TE.triage_cohort_adjusted_multilabel"""
query_job = client.query(q_cohort)
df_cohort = query_job.result().to_dataframe()

In [None]:
df_cohort.head()

In [None]:
df_cohort[['label_24hr', 'label_12hr', 'acute_to_critical_24hr', 'acute_to_critical_12hr']].sum()

### Load in Predictions From LGBM

In [None]:
df_preds_full = pd.read_csv('/home/ccorbin/triage_models/lightgbm/test_yhats.csv')
df_preds_simple = pd.read_csv('/home/ccorbin/BMI212/notebooks/results_gmb.csv')

In [None]:
print(df_preds_full.shape)
print(df_preds_simple.shape)

In [None]:
df_preds_simple = df_preds_simple.rename(columns={'preds_gbm' : 'predictions'})

In [None]:
df_preds_full = df_preds_full[['pat_enc_csn_id_coded', 'label', 'predictions']]
df_preds_simple = df_preds_simple[['pat_enc_csn_id_coded', 'label', 'predictions']]

In [None]:
df_preds_full.head()

In [None]:
df_preds_full = pd.merge(df_preds_full, df_cohort, on='pat_enc_csn_id_coded', how='left')
df_preds_simple = pd.merge(df_preds_simple, df_cohort, on='pat_enc_csn_id_coded', how='left')

In [None]:
df_preds_full.head()

# for i in range(len(df_preds)):
#     assert df_preds['label'].values[i] == df_preds['label_24hr'].values[i]

### Get list of patient ids that were used for training

In [None]:
training_jc_uids = set(df_cohort[df_cohort['admit_time'] < '2017-07-01']['jc_uid'].values)
test_jc_uids = set(df_preds_full['jc_uid'].values)
seen_jc_uids = training_jc_uids.intersection(test_jc_uids)

df_preds_unseen = df_preds_full[~df_preds_full['jc_uid'].isin(seen_jc_uids)]

# Sanity Check
for i in range(len(df_preds_unseen)):
    assert df_preds_unseen['jc_uid'].values[i] not in training_jc_uids

In [None]:
import pdb

def plot_perf(labels, predictions, title):
    # Plots ROC AND PRC side by side
    sns.set(font_scale=2.)
    sns.set_style("whitegrid")
    fig1 = plt.figure(figsize=(16, 8))

    ax1 = fig1.add_subplot(1, 2, 1)


    tpr, fpr, _ = roc_curve(labels, predictions)
    auroc = roc_auc_score(labels, predictions)

    ax1.plot(tpr, fpr, sns.xkcd_rgb["denim blue"], lw=2.5, label = 'AUROC %.2f' % auroc )
    ax1.plot([0, 1],
             [0, 1],
             sns.xkcd_rgb["slate grey"],
            lw=1.5, linestyle='--')
    
    ax1.set_ylabel('Sensitivity')
    ax1.set_xlabel('1 - Specificity')
    ax1.set_title(title)
    ax1.legend()
    ax1.plot()
    
    ax2 = fig1.add_subplot(1, 2, 2)

    precision, recall, _ = precision_recall_curve(labels, predictions)
    auprc = average_precision_score(labels, predictions)

    ax2.plot(recall, precision, sns.xkcd_rgb["denim blue"], lw=2.5, label = 'AUPRC %.2f' % auprc)
    
    ax2.set_ylabel('Precision')
    ax2.set_xlabel('Recall')
    ax2.set_title(title)
    ax2.legend()
    ax2.plot()
    

plot_perf(df_preds_full['label'], df_preds_full['predictions'], 'All Validation Examples')


In [None]:
plot_perf(df_preds_simple['label'], df_preds_simple['predictions'], 'All Test Examples Simple Model')


In [None]:
# Get prob thresh 
precision, recall, p_thresh = precision_recall_curve(df_preds_full['label'], df_preds_full['predictions'])
def get_index_at_recall(val):
    for i, rec in enumerate(recall):
        if rec < val:
            return i-1
    return None

print(p_thresh[get_index_at_recall(0.8)])

### Plot Performance For Patients that Were Not Seen In Training

In [None]:
plot_perf(df_preds_unseen['label'], df_preds_unseen['predictions'], 'New Patients Only')


### Plot Performance Where Labels are 0 if acute care only and 1 if moved TO ICU within 24 hours

In [None]:
df_preds_move = df_preds[(df_preds['label'] == 0) | (df_preds['acute_to_critical_24hr']  == 1)]

In [None]:
df_preds_move.head()

In [None]:
plot_perf(df_preds_move['label'], df_preds_move['predictions'], 'Under Triaged Label')


In [None]:
74/len(df_preds_move)

### Now Get Sensitivity and Specificity Cutoff For Doctors Assuming 24 hours after is correct label

In [None]:
from sklearn.metrics import classification_report

In [None]:
df_preds.sum()

In [None]:
df_preds['init_assignmet'] = df_preds.apply(lambda x: 1 if x.label_24hr and not x.acute_to_critical_24hr else 0, axis=1)

In [None]:
df_preds.sum()

In [None]:
print(classification_report(df_preds['label_24hr'], df_preds['init_assignmet']))

In [None]:
def plot_perf_with_marker(labels, predictions, title):
    # Plots ROC AND PRC side by side
    sns.set(font_scale=2.)
    sns.set_style("whitegrid")
    fig1 = plt.figure(figsize=(16, 8))

    ax1 = fig1.add_subplot(1, 2, 1)


    tpr, fpr, _ = roc_curve(labels, predictions)
    auroc = roc_auc_score(labels, predictions)

    ax1.plot(tpr, fpr, sns.xkcd_rgb["denim blue"], lw=2.5, label = 'AUROC %.2f' % auroc )
    ax1.plot([0, 1],
             [0, 1],
             sns.xkcd_rgb["slate grey"],
            lw=1.5, linestyle='--')
    
    ax1.scatter(np.array([0]), np.array([0.85]), marker='x', s=200, color=sns.xkcd_rgb['light orange'], label = 'ER Doc Decision')
    ax1.set_ylabel('Sensitivity')
    ax1.set_xlabel('1 - Specificity')
    ax1.set_title(title)
    ax1.legend()
    ax1.plot()
    
    
    ax2 = fig1.add_subplot(1, 2, 2)

    precision, recall, _ = precision_recall_curve(labels, predictions)
    auprc = average_precision_score(labels, predictions)
    ax2.scatter(np.array([0.85]), np.array([1]), marker='x', s=200, color=sns.xkcd_rgb['light orange'], label = 'ER Doc Decision')

    ax2.plot(recall, precision, sns.xkcd_rgb["denim blue"], lw=2.5, label = 'AUPRC %.2f' % auprc)
    
    ax2.set_ylabel('Precision')
    ax2.set_xlabel('Recall')
    ax2.set_title(title)
    ax2.legend()
    ax2.plot()
    

plot_perf_with_marker(df_preds['label'], df_preds['predictions'], 'All Validation Examples')
