In [1]:
import sys
import numpy as np
import pandas as pd
import joblib
import shap  # Import SHAP library
import seaborn as sns  # Import Seaborn for visualization
import matplotlib.pyplot as plt  # Import matplotlib for plotting
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import GridSearchCV, train_test_split, cross_validate
from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score
from sklearn.metrics import classification_report
from scipy import sparse as sp
from pathlib import Path

# Add project root to the Python path
project_root = Path.cwd().parent.parent
sys.path.append(str(project_root))

# Loading dataset
path = "/Users/jeancherizol/MADS/699/all_data"
path_models = "/Users/jeancherizol/MADS/699/SIADS/emergency-dept-optimization/models"

path_data = "/Users/jeancherizol/MADS/699/all_data"
processor_filename = f"{path_models}/data_processor.joblib"
processor = joblib.load(processor_filename)

# Load the test data
test_data_filename = f"{path_data}/test_df.csv"
y_test_data_filename = f"{path_data}/y_test_waittime_classification.csv"

X_test = pd.read_csv(test_data_filename,low_memory=False)
y_test = pd.read_csv(y_test_data_filename,low_memory=False)
y_test = y_test['WAITTIME_BINARY']

print("Test data loaded successfully.")

# Transform the test data using the loaded processor
X_test_preprocessed = processor.transform(X_test)

Test data loaded successfully.


In [2]:
# Top 60 most important features
# top_60_features = ['ARREMS', 'ARRTIME_IN_HOUR', 'DIFFEHRE', 'PATWT', 'DIAGSCRN', 'CSTRATM', 'EDINFO', 'NUMGIV', 'CPSUM', 'PHYSPRACTRIA', 'TOTDIAG', 'HOSPCODE', 'EMRED', 'IMMEDR', 'PULSE', 'WIRELESS', 'EMEDRES', 'BPSYS', 'EMSGE', 'RFV23D', 'RETREFFU', 'OTHPROV', 'MSA', 'EDPRIM', 'IVFLUIDS', 'FASTTRAK', 'TOTPROC', 'ESCRIPE', 'VMONTH', 'RFV33D', 'EREMINDE', 'EMSGER', 'BEDDATA', 'ADMDIV', 'MED1', 'DIAG3_-9', 'EGRAPHE', 'EBILLANYE', 'ESHAREPROVE2', 'EIDPTE', 'KIOSELCHK', 'CTHEAD', 'PAYMCAID', 'ECQMER', 'RFV2', 'AMBDIV', 'BPDIASD', 'ESHAREPROVE4', 'RFV1', 'PAYTYPER', 'URINECX', 'OTHERBLD', 'PROC', 'EPTRECE', 'RFV13D', 'ADVTRIAG', 'ESHAREPROVE7']
top_60_features = ['LOV', 'ARREMS', 'ARRTIME_IN_HOUR', 'DIFFEHRE', 'PATWT', 'DIAGSCRN', 'CSTRATM', 'EDINFO', 'NUMGIV', 'CPSUM', 'PHYSPRACTRIA', 'TOTDIAG', 'HOSPCODE', 'EMRED', 'IMMEDR', 'PULSE', 'WIRELESS', 'EMEDRES', 'BPSYS', 'EMSGE', 'RFV23D', 'RETREFFU', 'OTHPROV', 'MSA', 'EDPRIM', 'IVFLUIDS', 'FASTTRAK', 'TOTPROC', 'ESCRIPE', 'VMONTH', 'RFV33D', 'EREMINDE', 'EMSGER', 'BEDDATA', 'ADMDIV', 'MED1', 'DIAG3_-9', 'EGRAPHE', 'EBILLANYE', 'ESHAREPROVE2', 'EIDPTE', 'KIOSELCHK', 'CTHEAD', 'PAYMCAID', 'ECQMER', 'RFV2', 'AMBDIV', 'BPDIASD', 'ESHAREPROVE4', 'RFV1', 'PAYTYPER', 'URINECX', 'OTHERBLD', 'PROC', 'EPTRECE', 'RFV13D', 'ADVTRIAG', 'ESHAREPROVE7']

# Get indice of the top 60 most important features
feature_indices = [2, 13, 476, 271, 475, 96, 473, 342, 212, 474, 365, 134, 265, 268, 31, 26, 370, 378, 28, 321, 40, 234, 230, 380, 341, 143, 366, 150, 291, 0, 41, 287, 322, 375, 356, 152, 3631, 303, 267, 333, 313, 361, 126, 18, 312, 35, 353, 220, 335, 34, 24, 119, 110, 135, 323, 39, 364, 338]
X_test_selected_features = X_test_preprocessed[:, feature_indices]

In [3]:
# Define the paths for the model and test data
model_filename = f"{path_models}/best_waittime_classification_catboost_model.joblib"

# Load the trained model
model = joblib.load(model_filename)
print("Model loaded successfully.")

# Predict class labels for the test set
y_pred = model.predict(X_test_selected_features)

# Predict probabilities for the test set
y_pred_proba = model.predict_proba(X_test_selected_features)[:, 1]  # Probabilities for the positive class

# Calculate the evaluation metrics
f1_validation = f1_score(y_test, y_pred, average='weighted')
roc_auc_validation = roc_auc_score(y_test, y_pred_proba)  # Ensure y_test is properly encoded for binary classification
precision_validation = precision_score(y_test, y_pred, average='weighted')
recall_validation = recall_score(y_test, y_pred, average='weighted')

# Print the evaluation metrics
print(f"Validation Metrics:\nF1 (Weighted): {f1_validation:.2f}, ROC AUC: {roc_auc_validation:.2f}, Precision (Weighted): {precision_validation:.2f}, Recall (Weighted): {recall_validation:.2f}")

#Metrics for each class
# Generate a classification report
# Correct usage of classification_report to compare true labels and predicted labels
report = classification_report(y_test, y_pred, output_dict=True)

# Print out the classification report
print(classification_report(y_test, y_pred))

# Access specific metrics from the report dictionary for custom output, if needed
for label, metrics in report.items():
    if label not in ('accuracy', 'macro avg', 'weighted avg'):
        print(f"Class {label} - Precision: {metrics['precision']:.2f}, Recall: {metrics['recall']:.2f}, F1-score: {metrics['f1-score']:.2f}")


Model loaded successfully.
Validation Metrics:
F1 (Weighted): 0.64, ROC AUC: 0.67, Precision (Weighted): 0.69, Recall (Weighted): 0.62
              precision    recall  f1-score   support

           0       0.81      0.61      0.70      2573
           1       0.39      0.63      0.48      1005

    accuracy                           0.62      3578
   macro avg       0.60      0.62      0.59      3578
weighted avg       0.69      0.62      0.64      3578

Class 0 - Precision: 0.81, Recall: 0.61, F1-score: 0.70
Class 1 - Precision: 0.39, Recall: 0.63, F1-score: 0.48


In [11]:

# # Load the best model
# model_filename = "best_waittime_classification_catboost_model.joblib"
# best_model = joblib.load(model_filename)

# Assuming 'X_test' and 'y_test' are your test datasets

# Make predictions on the test set
y_test_pred = model.predict(X_test_selected_features)
y_test_proba = model.predict_proba(X_test_selected_features)[:, 1]  # For ROC AUC

# Calculate metrics
f1_test = f1_score(y_test, y_test_pred, average='weighted')
roc_auc_test = roc_auc_score(y_test, y_test_proba)
precision_test = precision_score(y_test, y_test_pred, average='weighted')
recall_test = recall_score(y_test, y_test_pred, average='weighted')

# Print test metrics
print(f"Test Metrics:\nF1 (Weighted): {f1_test:.2f}, ROC AUC: {roc_auc_test:.2f}, Precision (Weighted): {precision_test:.2f}, Recall (Weighted): {recall_test:.2f}")

# Generate and print out the classification report
print(classification_report(y_test, y_test_pred))

# For more detailed metrics for each class
report = classification_report(y_test, y_test_pred, output_dict=True)
for label, metrics in report.items():
    if label not in ('accuracy', 'macro avg', 'weighted avg'):
        print(f"Class {label} - Precision: {metrics['precision']:.2f}, Recall: {metrics['recall']:.2f}, F1-score: {metrics['f1-score']:.2f}")


Test Metrics:
F1 (Weighted): 0.64, ROC AUC: 0.67, Precision (Weighted): 0.69, Recall (Weighted): 0.62
              precision    recall  f1-score   support

           0       0.81      0.61      0.70      2573
           1       0.39      0.63      0.48      1005

    accuracy                           0.62      3578
   macro avg       0.60      0.62      0.59      3578
weighted avg       0.69      0.62      0.64      3578

Class 0 - Precision: 0.81, Recall: 0.61, F1-score: 0.70
Class 1 - Precision: 0.39, Recall: 0.63, F1-score: 0.48
