# ML Assignment 2 — Interactive Model Evaluation (Notebook Version)

This is the **notebook-friendly** version of `app.py`.  
Streamlit `st.*` calls are replaced with standard notebook outputs so you can **see everything inline**.

Run each cell sequentially to load models, evaluate, and visualise results.


## 1. Imports


In [None]:
from __future__ import annotations
import json
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    matthews_corrcoef,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
    ConfusionMatrixDisplay,
)

print("All imports successful ✅")


## 2. Setup Paths & Load Metadata


In [None]:
PROJECT_ROOT = Path.cwd()
if not (PROJECT_ROOT / "model").exists() and (PROJECT_ROOT.parent / "model").exists():
    PROJECT_ROOT = PROJECT_ROOT.parent

MODEL_DIR = PROJECT_ROOT / "model"
DATA_DIR  = PROJECT_ROOT / "data"

print(f"MODEL_DIR : {MODEL_DIR}")
print(f"DATA_DIR  : {DATA_DIR}")

# Load dataset metadata
with open(MODEL_DIR / "dataset_metadata.json") as f:
    metadata = json.load(f)

print(f"\nDataset   : {metadata['dataset_name']}")
print(f"Instances : {metadata['instances']}")
print(f"Features  : {metadata['features']}")
print(f"Classes   : {metadata['target_names']}")


## 3. Available Models


In [None]:
MODEL_FILE_MAP = {
    "Logistic Regression": "logistic_regression.pkl",
    "Decision Tree": "decision_tree.pkl",
    "kNN": "knn.pkl",
    "Naive Bayes": "naive_bayes.pkl",
    "Random Forest (Ensemble)": "random_forest_ensemble.pkl",
    "XGBoost (Ensemble)": "xgboost_ensemble.pkl",
}

print("Available models:")
for i, name in enumerate(MODEL_FILE_MAP, 1):
    print(f"  {i}. {name}")


## 4. Load Precomputed Metrics Table


In [None]:
metrics_df = pd.read_csv(MODEL_DIR / "model_metrics.csv")
metrics_df.style.format({
    "accuracy": "{:.4f}", "auc": "{:.4f}", "precision": "{:.4f}",
    "recall": "{:.4f}", "f1": "{:.4f}", "mcc": "{:.4f}",
}).background_gradient(cmap="Greens", subset=["accuracy", "auc", "precision", "recall", "f1", "mcc"])


## 5. Load Test Data


In [None]:
test_df = pd.read_csv(DATA_DIR / "test_data.csv")
feature_cols = [c for c in test_df.columns if c != "target"]

print(f"Test samples : {len(test_df)}")
print(f"Feature cols : {len(feature_cols)}")
test_df.head()


## 6. Choose a Model to Evaluate

Change the value of `SELECTED_MODEL` below to any of the 6 model names, then re-run this cell and the cells below.


In [None]:
# ⬇️ Change this to evaluate a different model
SELECTED_MODEL = "Logistic Regression"

model = joblib.load(MODEL_DIR / MODEL_FILE_MAP[SELECTED_MODEL])
print(f"Loaded: {SELECTED_MODEL}")
print(model)


## 7. Generate Predictions


In [None]:
X_eval = test_df[feature_cols]
y_true = test_df["target"]

y_pred = model.predict(X_eval)
y_prob = model.predict_proba(X_eval)[:, 1]

print(f"Predictions shape : {y_pred.shape}")
print(f"First 10 predicted: {y_pred[:10].tolist()}")
print(f"First 10 actual   : {y_true.values[:10].tolist()}")


## 8. Evaluation Metrics


In [None]:
metrics = {
    "Accuracy":  accuracy_score(y_true, y_pred),
    "AUC":       roc_auc_score(y_true, y_prob),
    "Precision": precision_score(y_true, y_pred),
    "Recall":    recall_score(y_true, y_pred),
    "F1":        f1_score(y_true, y_pred),
    "MCC":       matthews_corrcoef(y_true, y_pred),
}

print(f"\n  Metrics for: {SELECTED_MODEL}")
print(f"  {"-"*40}")
for k, v in metrics.items():
    print(f"  {k:>10s} : {v:.4f}")


## 9. Classification Report


In [None]:
report = classification_report(y_true, y_pred, target_names=["malignant", "benign"], output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df.style.format("{:.4f}").background_gradient(cmap="Blues")


## 10. Confusion Matrix


In [None]:
cm = confusion_matrix(y_true, y_pred)
fig, ax = plt.subplots(figsize=(5, 4))
ConfusionMatrixDisplay(cm, display_labels=["malignant", "benign"]).plot(ax=ax, cmap="Blues", colorbar=False)
ax.set_title(f"{SELECTED_MODEL} — Confusion Matrix")
plt.tight_layout()
plt.show()


## 11. ROC Curve


In [None]:
fpr, tpr, _ = roc_curve(y_true, y_prob)
fig, ax = plt.subplots(figsize=(6, 4))
ax.plot(fpr, tpr, lw=2, label=f"AUC = {metrics['AUC']:.4f}")
ax.plot([0, 1], [0, 1], "k--", lw=1)
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.set_title(f"{SELECTED_MODEL} — ROC Curve")
ax.legend(loc="lower right")
plt.tight_layout()
plt.show()


## 12. Prediction Preview


In [None]:
preview = test_df.copy()
preview["prediction"] = y_pred
preview["correct"] = (preview["target"] == preview["prediction"])

print(f"Correct: {preview['correct'].sum()} / {len(preview)}")
preview.head(20)


## 13. Compare All Models on Test Set

Loads every saved model and evaluates on the same test set so you can see a full side-by-side.


In [None]:
all_eval = []
for name, fname in MODEL_FILE_MAP.items():
    pipe = joblib.load(MODEL_DIR / fname)
    yp = pipe.predict(X_eval)
    ypr = pipe.predict_proba(X_eval)[:, 1]
    all_eval.append({
        "model_name": name,
        "accuracy": accuracy_score(y_true, yp),
        "auc": roc_auc_score(y_true, ypr),
        "precision": precision_score(y_true, yp),
        "recall": recall_score(y_true, yp),
        "f1": f1_score(y_true, yp),
        "mcc": matthews_corrcoef(y_true, yp),
    })

compare_df = pd.DataFrame(all_eval).sort_values("accuracy", ascending=False).reset_index(drop=True)
compare_df.style.format({
    "accuracy": "{:.4f}", "auc": "{:.4f}", "precision": "{:.4f}",
    "recall": "{:.4f}", "f1": "{:.4f}", "mcc": "{:.4f}",
}).background_gradient(cmap="Greens", subset=["accuracy", "auc", "precision", "recall", "f1", "mcc"])


## 14. All Models — ROC Overlay


In [None]:
fig, ax = plt.subplots(figsize=(7, 5))
for name, fname in MODEL_FILE_MAP.items():
    pipe = joblib.load(MODEL_DIR / fname)
    ypr = pipe.predict_proba(X_eval)[:, 1]
    fpr, tpr, _ = roc_curve(y_true, ypr)
    auc_val = roc_auc_score(y_true, ypr)
    ax.plot(fpr, tpr, lw=2, label=f"{name} (AUC={auc_val:.4f})")

ax.plot([0, 1], [0, 1], "k--", lw=1)
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.set_title("ROC Curves — All Models")
ax.legend(loc="lower right", fontsize=8)
plt.tight_layout()
plt.show()


---

✅ **Done!** You have verified all models interactively.  
To run the actual Streamlit web UI, use: `streamlit run app.py`
