In [1]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, confusion_matrix, average_precision_score
import joblib
import pandas as pd

In [2]:
DATASET = 'mcmed' # penn, mcmed, physionet

In [3]:
# LOAD DATA
df = pd.read_parquet(f'data/{DATASET}_classic.parquet')

In [4]:
X = df.drop(columns=['pat_enc_csn_id', 'label'])
y = df[['label']]

In [10]:
# LOAD MODEL
model = joblib.load(f'checkpoints/{DATASET}/{DATASET}_rf.pkl')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [11]:
# IMPUTE DATA: FILL IN MISSING VALUES
imputer = joblib.load(f'checkpoints/{DATASET}/{DATASET}_rf_imputer.pkl')
X = X[model.feature_names_in_]
X = imputer.transform(X)
X = pd.DataFrame(X, columns=model.feature_names_in_)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [7]:
# INFERENCE
pred = model.predict(X)
pred_prob = model.predict_proba(X)[:, 1]

In [8]:
# COMPUTE METRICS
acc = accuracy_score(y, pred)
precision, recall, f1, _ = precision_recall_fscore_support(y, pred, average='binary')
conf_matrix = confusion_matrix(y, pred) 

auc = roc_auc_score(y, pred_prob)
auprc = average_precision_score(y, pred_prob)

In [9]:
print(f"Acc: {acc:.4f}, F1: {f1:.4f}, AUC: {auc:.4f}, AUPRC: {auprc:.4f}")
print(conf_matrix)

Acc: 0.9861, F1: 0.7143, AUC: 0.8911, AUPRC: 0.6899
[[6407    6]
 [  86  115]]
