In [3]:
import joblib, numpy as np, matplotlib.pyplot as plt
from sklearn.metrics import (
    roc_curve, precision_recall_curve, auc
)
from pathlib import Path

ROOT = Path.cwd().parent    
DATA = ROOT / "data" / "processed"
MODELS = ROOT / "models"

X = joblib.load(DATA / "X_train.pkl")
y = joblib.load(DATA / "y_train.pkl")

logreg = joblib.load(MODELS / "logreg.pkl")
xgb    = joblib.load(MODELS / "xgb_optuna_best.pkl")

proba_lr = logreg.predict_proba(X)[:,1]
proba_xgb = xgb.predict_proba(X)[:,1]


plt.figure()
for name, p in [('LogReg', proba_lr), ('XGB', proba_xgb)]:
    fpr, tpr, _ = roc_curve(y, p)
    plt.plot(fpr, tpr, label=f'{name} AUC={auc(fpr,tpr):.3f}')
plt.plot([0,1],[0,1],'k--')
plt.legend()
plt.title("ROC Curve")

plt.savefig(ROOT / "figures/roc_compare.png", dpi=150)
plt.close()    


plt.figure()
for name, p in [('LogReg', proba_lr), ('XGB', proba_xgb)]:
    prec, rec, _ = precision_recall_curve(y, p)
    plt.plot(rec, prec, label=f'{name} AP={auc(rec,prec):.3f}')
plt.legend()
plt.title("PR Curve")

plt.savefig(ROOT / "figures/pr_compare.png", dpi=150)
plt.close()


In [6]:
# lgbm_compare

# ── Robust ROC / PR comparison ─────────────────────────────────────────
from pathlib import Path
import joblib, lightgbm as lgbm, matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, precision_recall_curve, auc
import numpy as np

ROOT = Path.cwd().parent              # notebooks/ → project root
DATA = ROOT / "data" / "processed"
MODELS = ROOT / "models"
FIGS = ROOT / "figures"; FIGS.mkdir(exist_ok=True)

# ----------------- helper: smart loader -------------------------------
def load_model(path: Path):
    """Returns a tuple (name, predict_proba_fn)."""
    name = path.stem                    # e.g. logreg, xgb_optuna_best
    if path.suffix == ".pkl":           # scikit-learn wrapper
        clf = joblib.load(path)
        return name, lambda X: clf.predict_proba(X)[:, 1]
    if path.suffix == ".txt":           # raw LightGBM booster
        booster = lgbm.Booster(model_file=str(path))
        return name, lambda X: booster.predict(X)
    raise ValueError(f"Unknown model file: {path}")

# ----------------- load data & models ---------------------------------
X = joblib.load(DATA / "X_train.pkl")
y = joblib.load(DATA / "y_train.pkl")

models = [
    load_model(MODELS / "logreg.pkl"),
    load_model(MODELS / "xgb_optuna_best.pkl"),
    load_model(MODELS / "lgbm_optuna_best.txt"),   # change to _baseline.txt if needed
]

# ----------------- ROC curve ------------------------------------------
plt.figure()
for name, proba_fn in models:
    proba = proba_fn(X)
    fpr, tpr, _ = roc_curve(y, proba)
    plt.plot(fpr, tpr, label=f"{name} AUC={auc(fpr,tpr):.3f}")
plt.plot([0,1],[0,1],'k--')
plt.legend(); plt.title("ROC Curve")
plt.savefig(FIGS / "roc_compare.png", dpi=150); plt.close()

# ----------------- PR curve -------------------------------------------
plt.figure()
for name, proba_fn in models:
    proba = proba_fn(X)
    prec, rec, _ = precision_recall_curve(y, proba)
    plt.plot(rec, prec, label=f"{name} AP={auc(rec,prec):.3f}")
plt.legend(); plt.title("PR Curve")
plt.savefig(FIGS / "pr_compare.png", dpi=150); plt.close()

print("✅ ROC & PR curves saved to figures/ directory")


✅ ROC & PR curves saved to figures/ directory
