# ADS 504 Team Project
## Evaluation & Comparison Notebook
Author: Darren Chen & Team

This notebook loads trained models and compares them using multiple evaluation techniques.

### Table of Contents
1. Setup
2. Load Models & Data
3. Cross-Validation & Metrics
4. ROC & PR Curves
5. Calibration & Threshold Analysis
6. Model Selection & Discussion
7. Deployment Considerations

In [None]:

# Section 1: Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import (accuracy_score, roc_auc_score, precision_recall_curve,
                             roc_curve, confusion_matrix, classification_report,
                             average_precision_score)
import joblib, json


In [None]:

# Section 2: Load Data & Models
df = pd.read_csv('/mnt/data/diabetes_df.csv')
TARGET = 'COMORBID'  # update if needed
y = df[TARGET]
X = df.drop(columns=[TARGET])

import glob, os
model_files = glob.glob('model_*.joblib')
models = {os.path.basename(f).split('_',1)[1].split('.joblib')[0]: joblib.load(f) for f in model_files}
with open('model_results.json') as f:
    base_results = json.load(f)
print('Loaded models:', list(models.keys()))


In [None]:

# Section 3: Cross-Validation & Metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn import impute

# Recreate preprocessor for CV
num_cols = X.select_dtypes(include=['float64','int64']).columns.tolist()
cat_cols = X.select_dtypes(include=['object','category']).columns.tolist()
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline([('imp', impute.SimpleImputer(strategy='median')), ('sc', StandardScaler())]), num_cols),
        ('cat', Pipeline([('imp', impute.SimpleImputer(strategy='most_frequent')), ('ohe', OneHotEncoder(handle_unknown='ignore'))]), cat_cols)
    ])

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
metrics = {}
for name, model in models.items():
    aucs = []
    for train_idx, test_idx in skf.split(X, y):
        X_tr, X_te = X.iloc[train_idx], X.iloc[test_idx]
        y_tr, y_te = y.iloc[train_idx], y.iloc[test_idx]
        pipe = Pipeline([('pre', preprocessor), ('model', model.steps[-1][1] if hasattr(model, 'steps') else model)])
        pipe.fit(X_tr, y_tr)
        y_prob = pipe.predict_proba(X_te)[:,1]
        aucs.append(roc_auc_score(y_te, y_prob))
    metrics[name] = {'cv_auc_mean': np.mean(aucs), 'cv_auc_std': np.std(aucs)}
cv_df = pd.DataFrame(metrics).T.sort_values('cv_auc_mean', ascending=False)
cv_df


In [None]:

# Section 4: ROC & PR Curves for Best Model
best_model_name = cv_df.index[0]
best_model = models[best_model_name]
y_prob = best_model.predict_proba(X)[:,1]

fpr, tpr, _ = roc_curve(y, y_prob)
precision, recall, _ = precision_recall_curve(y, y_prob)

plt.figure()
plt.plot(fpr, tpr)
plt.xlabel('FPR'); plt.ylabel('TPR'); plt.title(f'ROC Curve ({best_model_name})')
plt.show()

plt.figure()
plt.plot(recall, precision)
plt.xlabel('Recall'); plt.ylabel('Precision'); plt.title(f'PR Curve ({best_model_name})')
plt.show()


In [None]:

# Section 5: Calibration & Threshold Analysis
from sklearn.calibration import calibration_curve
prob_true, prob_pred = calibration_curve(y, y_prob, n_bins=10)
plt.figure()
plt.plot(prob_pred, prob_true, marker='o')
plt.plot([0,1],[0,1],'--')
plt.xlabel('Predicted'); plt.ylabel('True'); plt.title('Calibration Curve')
plt.show()

# Threshold for 90% recall
thresholds = np.linspace(0,1,101)
recall_vals = []
precision_vals = []
for t in thresholds:
    preds = (y_prob >= t).astype(int)
    tp = ((preds==1)&(y==1)).sum()
    fp = ((preds==1)&(y==0)).sum()
    fn = ((preds==0)&(y==1)).sum()
    recall_vals.append(tp/(tp+fn))
    precision_vals.append(tp/(tp+fp) if (tp+fp)>0 else 0)
idx = np.argmax(np.array(recall_vals) >= 0.90)
chosen_t = thresholds[idx]
print("Chosen threshold:", chosen_t, "Recall:", recall_vals[idx], "Precision:", precision_vals[idx])


In [None]:

# Section 6: Model Selection & Discussion
summary = pd.concat([cv_df, pd.DataFrame(base_results).T[['acc','auc']]], axis=1)
summary.sort_values('cv_auc_mean', ascending=False)


In [None]:

# Section 7: Deployment Considerations
print("""Deployment Notes:
- Track input data drift with statistical tests (e.g., KS test) on feature distributions.
- Monitor performance metrics (AUC, PR AUC) on a rolling window.
- Use shadow deployment before full rollout of retrained models.
""")
