In [1]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, confusion_matrix, average_precision_score
import joblib
import pandas as pd

In [2]:
DATASET = 'mcmed' # penn, mcmed, physionet

In [3]:
# LOAD DATA
df = pd.read_parquet(f'data/{DATASET}_classic.parquet')

In [4]:
X = df.drop(columns=['pat_enc_csn_id', 'label'])
y = df[['label']]

In [13]:
# LOAD MODEL
model = XGBClassifier()
model.load_model(f'checkpoints/{DATASET}/{DATASET}_xgb.json')

In [15]:
# IMPUTE DATA: FILL IN MISSING VALUES
imputer = joblib.load(f'checkpoints/{DATASET}/{DATASET}_xgb_imputer.pkl')
X = X[model.feature_names_in_]
X = imputer.transform(X)
X = pd.DataFrame(X, columns=model.feature_names_in_)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [11]:
X

Unnamed: 0,SpO2_min,SpO2_max,SpO2_mean,SpO2_std,SpO2_median,SpO2_skew,SpO2_kurtosis,SpO2_slope,SpO2_qr25,SpO2_qr75,...,HEMOGLOBIN (HGB)_max,HEMOGLOBIN (HGB)_mean,HEMOGLOBIN (HGB)_std,HEMOGLOBIN (HGB)_median,HEMOGLOBIN (HGB)_qr25,HEMOGLOBIN (HGB)_qr75,Age,Gender_ind,Race_ind,Ethnicity_ind
0,96.000000,99.666667,97.935580,0.678416,98.000000,-0.593350,0.416494,-0.002904,97.468238,98.456697,...,11.8,11.8,0.0,11.8,11.8,11.8,50.0,0.0,5.0,0.0
1,96.000000,99.666667,97.935580,0.678416,98.000000,-0.593350,0.416494,-0.002904,97.468238,98.456697,...,14.9,14.9,0.0,14.9,14.9,14.9,23.0,1.0,0.0,2.0
2,96.000000,99.666667,97.935580,0.678416,98.000000,-0.593350,0.416494,-0.002904,97.468238,98.456697,...,14.6,14.6,0.0,14.6,14.6,14.6,62.0,0.0,0.0,2.0
3,96.000000,99.666667,97.935580,0.678416,98.000000,-0.593350,0.416494,-0.002904,97.468238,98.456697,...,13.6,13.6,0.0,13.6,13.6,13.6,54.0,0.0,3.0,0.0
4,96.000000,99.666667,97.935580,0.678416,98.000000,-0.593350,0.416494,-0.002904,97.468238,98.456697,...,15.5,15.5,0.0,15.5,15.5,15.5,73.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6609,97.000000,100.000000,98.666667,1.247219,99.000000,-0.381802,-1.500000,-0.500000,98.000000,99.500000,...,9.3,9.3,0.0,9.3,9.3,9.3,90.0,0.0,3.0,0.0
6610,92.461538,100.000000,96.198634,1.913192,95.870690,0.445772,-0.637296,-0.175202,94.811368,97.526008,...,15.5,15.5,0.0,15.5,15.5,15.5,40.0,1.0,0.0,2.0
6611,93.152542,97.016949,95.398155,0.731448,95.318966,0.107773,-0.007309,0.008575,95.000000,95.894068,...,12.6,12.6,0.0,12.6,12.6,12.6,82.0,1.0,0.0,2.0
6612,86.500000,96.113208,94.452030,1.442614,94.258621,-3.522674,17.762667,-0.015024,94.000000,95.189655,...,10.9,10.9,0.0,10.9,10.9,10.9,88.0,1.0,1.0,0.0


In [12]:
model

In [8]:
# INFERENCE
pred = model.predict(X)
pred_prob = model.predict_proba(X)[:, 1]

In [9]:
# COMPUTE METRICS
acc = accuracy_score(y, pred)
precision, recall, f1, _ = precision_recall_fscore_support(y, pred, average='binary')
conf_matrix = confusion_matrix(y, pred) 

auc = roc_auc_score(y, pred_prob)
auprc = average_precision_score(y, pred_prob)

In [10]:
print(f"Acc: {acc:.4f}, F1: {f1:.4f}, AUC: {auc:.4f}, AUPRC: {auprc:.4f}")
print(conf_matrix)

Acc: 0.9793, F1: 0.6496, AUC: 0.8732, AUPRC: 0.6871
[[6350   63]
 [  74  127]]
