In [None]:
"""
File: CompareAUROC-Poster.ipynb
---------------------------------
Compare performance of XGBoost to BigBird & Bi-LSTM using the AUROC curve for all three models on the same test set
Used to generate the AUROC curves on the poster showcased in Vector Institute's Research Symposium, on February 9
"""

In [None]:
# Import dependencies and define useful constants
import os

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import expit
from sklearn.metrics import (
    roc_auc_score,
    roc_curve,
)


plt.style.use("seaborn-v0_8")
%matplotlib inline

TEST_SIZE = "512"
TEST_GROUP = "two_weeks"
TRANSFORMER_TEST_GROUP = "week" if TEST_GROUP == "two_weeks" else "month"

ROOT = "/fs01/home/afallah/odyssey/odyssey"
DATA_ROOT = f"{ROOT}/data/slurm_data/{TEST_SIZE}/{TEST_GROUP}"
os.chdir(ROOT)

In [None]:
# Load predictions, labels, and probabilities of different models
y_xgboost_pred = np.load(f"{ROOT}/xgboost_y_test_pred_{TEST_GROUP}.npy")
y_xgboost_labels = np.load(f"{ROOT}/xgboost_y_test_pred_{TEST_GROUP}_labels.npy")
y_xgboost_prob = np.load(f"{ROOT}/xgboost_y_test_pred_{TEST_GROUP}_prob.npy")
y_xgboost_prob = y_xgboost_prob[:, 1]

y_lstm_pred = np.load(f"{ROOT}/lstm_y_test_pred_{TEST_GROUP}.npy")
y_lstm_labels = np.load(f"{ROOT}/lstm_y_test_pred_{TEST_GROUP}_labels.npy")
y_lstm_prob = np.load(f"{ROOT}/lstm_y_test_pred_{TEST_GROUP}_prob.npy")

y_transformer_pred = np.load(f"/ssd003/projects/aieng/public/odyssey/results/test_preds_{TRANSFORMER_TEST_GROUP}.npy")
y_transformer_labels = np.load(f"/ssd003/projects/aieng/public/odyssey/results/test_labels_{TRANSFORMER_TEST_GROUP}.npy")
y_transformer_prob = np.load(f"/ssd003/projects/aieng/public/odyssey/results/test_prob_{TRANSFORMER_TEST_GROUP}.npy")
y_transformer_prob = expit(y_transformer_prob[:, 1])

In [None]:
# Plot ROC Curve for XGBoost, Bi-LSTM, and Transformer
fpr_xgboost, tpr_xgboost, _ = roc_curve(y_xgboost_labels, y_xgboost_prob)
fpr_lstm, tpr_lstm, _ = roc_curve(y_lstm_labels, y_lstm_prob)
fpr_transformer, tpr_transformer, _ = roc_curve(y_transformer_labels, y_transformer_prob)

# AUROC
y_xgboost_auroc = roc_auc_score(y_xgboost_labels, y_xgboost_prob)
y_lstm_auroc = roc_auc_score(y_lstm_labels, y_lstm_prob)
transformer_auroc = roc_auc_score(y_transformer_labels, y_transformer_prob)

In [None]:
# Plot Information
plt.figure(figsize=(8, 10))

plt.plot(fpr_transformer, tpr_transformer, label=f"BigBird = {transformer_auroc:.2f}", color="red")
plt.plot(fpr_xgboost, tpr_xgboost, label=f"XGBoost = {y_xgboost_auroc:.2f}", color="green")
plt.plot(fpr_lstm, tpr_lstm, label=f"Bi-LSTM = {y_lstm_auroc:.2f}", color="blue")
plt.plot([0, 1], [0, 1], linestyle="--", color="gray", label="Random")

plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - Two-Weeks Mortality Prediction")
plt.legend(loc="lower right", fontsize="large", facecolor="white", frameon=True)
plt.show()