## Model Evaluation - Balanced

In [None]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression

from tensorflow.keras.models import load_model

import xgboost as xgb
from xgboost import DMatrix

from sklearn.metrics import accuracy_score, roc_auc_score, RocCurveDisplay, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

import re

import pickle
from joblib import load

Load the test sets

In [None]:
# logistic regression and neural network
test_feats_file = "~/scratch/datasets/yale_new_haven/training_test_sets/full_dataset/features/normalized_preprocessing/regression_nn/yale_new_haven_test_features.csv"
# xgboost
test_feats_xgb_file = "~/scratch/datasets/yale_new_haven/training_test_sets/full_dataset/features/normalized_preprocessing/xgboost/yale_new_haven_test_features_xgb.csv"
# test labels
test_labels_file = "~/scratch/datasets/yale_new_haven/training_test_sets/full_dataset/labels/full_dataset_test_labels.csv"

In [None]:
X_test = pd.read_csv(test_feats_file)
X_test_xgb = pd.read_csv(test_feats_xgb_file)
y_test = pd.read_csv(test_labels_file)

In [None]:
test_ids = X_test['ID'].astype("int32")

In [None]:
X_test = X_test[[col for col in X_test.columns if col != 'ID']]
X_test_xgb = X_test_xgb[[col for col in X_test_xgb.columns if col != 'ID']]

In [None]:
# create the DMatrix for XGBoost
dtest = DMatrix(X_test_xgb, y_test)

Load the models

In [None]:
lr_filepath = "/home/mila/d/david.hobson/scratch/models/full/logistic_regresion_full.joblib"
nn_filepath = "/home/mila/d/david.hobson/scratch/models/full/neural_network_full"
xgb_filepath = "/home/mila/d/david.hobson/scratch/models/full/xgboost_full.ubj"

In [None]:
xgb_params = {
    'tree_method': 'hist',   # tree construction algorithm, 'hist' and 'gpu_hist' are recommended for large datasets

    # parameters Hong used
    'eta': 0.3,                              # learning rate
    'nthread': 5,                            # maximum number of threads to run simulateously
    'eval_metric': 'auc',                    # evaluation metric
    'objective': 'binary:logistic',          # objective function

    # parameters Hong optimized for
    'max_depth': 20,                         # max depth of the tree
    'colsample_bylevel': 0.05,               # subsample ratio of columns at each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
}

In [None]:
lr_clf = load(lr_filepath)
nn_model = load_model(nn_filepath)

In [None]:
xgb_model = xgb.Booster(xgb_params)  # init model
xgb_model.load_model(xgb_filepath)  # load data

### Test set probability

In [None]:
lr_acc = lr_clf.score(X_test, y_test)
nn_loss, nn_acc = nn_model.evaluate(X_test, y_test)
xgb_acc = accuracy_score(y_test, np.round(xgb_model.predict(dtest)))

In [None]:
print(f"Test Accuracy:\n\tLogistic Regression: {np.round(lr_acc*100, 2)}\n\tNeural Network: {np.round(nn_acc*100, 2)}\n\tXGBoost: {np.round(xgb_acc*100, 2)}")

## AUROC

In [None]:
# for binary case, you give function the probabilities of the items being in the "1" class
# or you can pass clf.decision_function(X_test)
# roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])

### Scores

In [None]:
# Hong: AUROC = 0.91
lr_auroc = roc_auc_score(y_test, lr_clf.predict_proba(X_test)[:, 1])
nn_auroc = roc_auc_score(y_test, nn_model.predict(X_test))
xgb_auroc = roc_auc_score(y_test, xgb_model.predict(dtest))

In [None]:
print(f"Test AUROC:\n\tLogistic Regression: {np.round(lr_auroc*100, 2)}\n\tNeural Network: {np.round(nn_auroc*100, 2)}\n\tXGBoost: {np.round(xgb_auroc*100,2)}")

### Plots

Logistic Regression

In [None]:
RocCurveDisplay.from_predictions(y_test, lr_clf.decision_function(X_test))
plt.show()

Neural Network

In [None]:
RocCurveDisplay.from_predictions(y_test, nn_model.predict(X_test))
plt.show()

XGBoost

In [None]:
RocCurveDisplay.from_predictions(y_test, xgb_model.predict(dtest))
plt.show()

### Seniors vs. Adults

In [None]:
senior_idxs_filename = "/home/mila/d/david.hobson/scratch/datasets/yale_new_haven/demographic_indices/age_65_and_over.pickle"
adult_idxs_filename = "/home/mila/d/david.hobson/scratch/datasets/yale_new_haven/demographic_indices/age_less_than_65.pickle"

In [None]:
# indices of patients 65 and older
with open(senior_idxs_filename, 'rb') as f:
    senior_idxs = pickle.load(f)
    
with open(adult_idxs_filename, 'rb') as f:
    adult_idxs = pickle.load(f)

In [None]:
seniors_test = test_ids[test_ids.isin(senior_idxs)].index
adults_test = test_ids[~test_ids.isin(senior_idxs)].index

In [None]:
dtest_seniors = DMatrix(X_test_xgb.loc[seniors_test], y_test.loc[seniors_test])
dtest_adults = DMatrix(X_test_xgb.loc[adults_test], y_test.loc[adults_test])

Seniors vs. Adults Scores

In [None]:
lr_acc_seniors = lr_clf.score(X_test.loc[seniors_test], y_test.loc[seniors_test])
lr_acc_adults = lr_clf.score(X_test.loc[adults_test], y_test.loc[adults_test])

nn_loss_seniors, nn_acc_seniors = nn_model.evaluate(X_test.loc[seniors_test], y_test.loc[seniors_test])
nn_loss_adults, nn_acc_adults = nn_model.evaluate(X_test.loc[adults_test], y_test.loc[adults_test])

xgb_acc_seniors = accuracy_score(y_test.loc[seniors_test], np.round(xgb_model.predict(dtest_seniors)))
xgb_acc_adults = accuracy_score(y_test.loc[adults_test], np.round(xgb_model.predict(dtest_adults)))

In [None]:
# print(f"Training set:\n\tAdults score: {np.round(adult_score_train*100, 2)}\n\tSeniors score: {np.round(senior_score_train*100, 2)}")
print("Test Accuracy")
print()
print(f"Logistic Regression:\n\tAdults score: {np.round(lr_acc_adults*100, 2)} (n = {len(adults_test)})\n\tSeniors score: {np.round(lr_acc_seniors*100, 2)} (n = {len(seniors_test)})")
print()
print(f"Neural Network:\n\tAdults score: {np.round(nn_acc_adults*100, 2)} (n = {len(adults_test)})\n\tSeniors score: {np.round(nn_acc_seniors*100, 2)} (n = {len(seniors_test)})")
print()
print(f"XGBoost:\n\tAdults score: {np.round(xgb_acc_adults*100, 2)} (n = {len(adults_test)})\n\tSeniors score: {np.round(xgb_acc_seniors*100, 2)} (n = {len(seniors_test)})")

### Confusion Matrix

In [None]:
y_pred_lr = lr_clf.predict(X_test)

y_pred_lr_seniors = lr_clf.predict(X_test.loc[seniors_test])
y_pred_lr_adults = lr_clf.predict(X_test.loc[adults_test])

In [None]:
y_pred_nn = np.round(nn_model.predict(X_test))

y_pred_nn_seniors = np.round(nn_model.predict(X_test.loc[seniors_test]))
y_pred_nn_adults = np.round(nn_model.predict(X_test.loc[adults_test]))

In [None]:
y_pred_xgb = np.round(xgb_model.predict(dtest))

y_pred_xgb_seniors = np.round(xgb_model.predict(dtest_seniors))
y_pred_xgb_adults = np.round(xgb_model.predict(dtest_adults))

Overall

In [None]:
norm = 'all'

print("Overall")
print('\tLogistic Regression')
print('\tNeural Network')
print("\tXGBoost")
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_lr, normalize=norm)
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_nn, normalize=norm)
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_xgb, normalize=norm)

Seniors

In [None]:
norm = 'pred'

print('Seniors')
print('\tLogistic Regression')
print('\tNeural Network')
print('\tXGBoost')
ConfusionMatrixDisplay.from_predictions(y_test.loc[seniors_test], y_pred_lr_seniors, normalize=norm)
ConfusionMatrixDisplay.from_predictions(y_test.loc[seniors_test], y_pred_nn_seniors, normalize=norm)
ConfusionMatrixDisplay.from_predictions(y_test.loc[seniors_test], y_pred_xgb_seniors, normalize=norm)

Adults

In [None]:
norm = 'all'

print("Adults")
print('\tLogistic Regression')
print('\tNeural Network')
print('\tXGBoost')
ConfusionMatrixDisplay.from_predictions(y_test.loc[adults_test], y_pred_lr_adults, normalize=norm)
ConfusionMatrixDisplay.from_predictions(y_test.loc[adults_test], y_pred_nn_adults, normalize=norm)
ConfusionMatrixDisplay.from_predictions(y_test.loc[adults_test], y_pred_xgb_adults, normalize=norm)

### Feature Importance (XGBoost)

In [None]:
importance_type = 'gain'       # average gain of splits which use the feature   
max_num_features = 15

xgb.plot_importance(xgb_model, importance_type=importance_type, max_num_features=max_num_features)