## Objective
This notebook evaluates trained predictive models beyond raw performance
metrics, focusing on interpretability, robustness, and clinical plausibility.

Key goals:
- Visualize ROC and Precision–Recall performance
- Examine feature importance and coefficients
- Interpret missingness-aware features
- Assess whether models rely on clinically sensible signals

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    roc_curve,
    roc_auc_score,
    precision_recall_curve,
    average_precision_score
)

In [None]:
df = pd.read_csv("../data/processed/clean_baseline.csv")

X_full = np.load("../data/processed/X_full.npy")
X_no_glucose = np.load("../data/processed/X_no_glucose.npy")
X_missingness = np.load("../data/processed/X_missingness.npy")

features_full = pd.read_csv("../data/processed/features_full.csv", header=None)[0].tolist()
features_no_glucose = pd.read_csv("../data/processed/features_no_glucose.csv", header=None)[0].tolist()
features_missingness = pd.read_csv("../data/processed/features_missingness.csv", header=None)[0].tolist()

TARGET_COLUMN = "target"  # <-- confirm column name
y = df[TARGET_COLUMN].values

In [None]:
lr = LogisticRegression(max_iter=1000)
rf = RandomForestClassifier(
    n_estimators=300,
    min_samples_leaf=5,
    random_state=42
)

lr.fit(X_full, y)
rf.fit(X_full, y)

In [None]:
y_prob_lr = lr.predict_proba(X_full)[:, 1]
y_prob_rf = rf.predict_proba(X_full)[:, 1]

fpr_lr, tpr_lr, _ = roc_curve(y, y_prob_lr)
fpr_rf, tpr_rf, _ = roc_curve(y, y_prob_rf)

auc_lr = roc_auc_score(y, y_prob_lr)
auc_rf = roc_auc_score(y, y_prob_rf)

In [None]:
plt.figure(figsize=(7, 6))
plt.plot(fpr_lr, tpr_lr, label=f"Logistic Regression (AUC={auc_lr:.3f})")
plt.plot(fpr_rf, tpr_rf, label=f"Random Forest (AUC={auc_rf:.3f})")
plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve Comparison")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
prec_lr, rec_lr, _ = precision_recall_curve(y, y_prob_lr)
prec_rf, rec_rf, _ = precision_recall_curve(y, y_prob_rf)

ap_lr = average_precision_score(y, y_prob_lr)
ap_rf = average_precision_score(y, y_prob_rf)

In [None]:
plt.figure(figsize=(7, 6))
plt.plot(rec_lr, prec_lr, label=f"Logistic Regression (AP={ap_lr:.3f})")
plt.plot(rec_rf, prec_rf, label=f"Random Forest (AP={ap_rf:.3f})")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision–Recall Curve")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
coef_df = pd.DataFrame({
    "feature": features_full,
    "coefficient": lr.coef_[0]
}).sort_values("coefficient", ascending=False)

coef_df.head(10)

In [None]:
coef_df.tail(10)

In [None]:
importance_df = pd.DataFrame({
    "feature": features_full,
    "importance": rf.feature_importances_
}).sort_values("importance", ascending=False)

importance_df.head(15)

In [None]:
plt.figure(figsize=(8, 6))
sns.barplot(
    x="importance",
    y="feature",
    data=importance_df.head(15)
)
plt.title("Top 15 Feature Importances – Random Forest")
plt.tight_layout()
plt.show()

In [None]:
missingness_features = [
    f for f in features_full if f.endswith("_missing")
]

importance_df[
    importance_df["feature"].isin(missingness_features)
].head(10)

In [None]:
rf_no_glucose = RandomForestClassifier(
    n_estimators=300,
    min_samples_leaf=5,
    random_state=42
)

rf_no_glucose.fit(X_no_glucose, y)
y_prob_ng = rf_no_glucose.predict_proba(X_no_glucose)[:, 1]

roc_auc_score(y, y_prob_ng)