# 06 – Model Evaluation

In this notebook we compare the performance of individual and ensemble models using a variety of classification metrics.  We plot ROC curves and visualise confusion matrices.

In [None]:
import os
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score, precision_score, recall_score, confusion_matrix, roc_curve

# Load test data
processed_dir = os.path.join(os.path.pardir, 'data', 'processed')
X_test = pd.read_csv(os.path.join(processed_dir, 'X_test.csv'))
y_test = pd.read_csv(os.path.join(processed_dir, 'y_test.csv')).squeeze()

# Load preprocessor
preprocessor = joblib.load(os.path.join(processed_dir, 'preprocessor.pkl'))

# Load trained models
models_dir = os.path.join(os.path.pardir, 'models')
model_files = [f for f in os.listdir(models_dir) if f.endswith('_model.pkl')]

results = []
roc_curves = {}

for file in model_files:
    name = file.replace('_model.pkl', '')
    model = joblib.load(os.path.join(models_dir, file))
    # Predictions and probabilities
    preds = model.predict(X_test)
    if hasattr(model, 'predict_proba'):
        probas = model.predict_proba(X_test)[:, 1]
        auc = roc_auc_score(y_test, probas)
        fpr, tpr, _ = roc_curve(y_test, probas)
        roc_curves[name] = (fpr, tpr, auc)
    else:
        auc = float('nan')
    acc = accuracy_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    precision = precision_score(y_test, preds)
    recall = recall_score(y_test, preds)
    results.append({'Model': name, 'Accuracy': acc, 'F1': f1, 'Precision': precision, 'Recall': recall, 'AUC': auc})

# Display results
results_df = pd.DataFrame(results).sort_values(by='AUC', ascending=False)
print(results_df)

# Plot ROC curves
plt.figure(figsize=(8, 6))
for name, (fpr, tpr, auc) in roc_curves.items():
    plt.plot(fpr, tpr, label=f'{name} (AUC={auc:.3f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend()
plt.show()

# Confusion matrix for the best model
best_model_name = results_df.iloc[0]['Model']
best_model = joblib.load(os.path.join(models_dir, f'{best_model_name}_model.pkl'))
best_preds = best_model.predict(X_test)
cm = confusion_matrix(y_test, best_preds)

plt.figure(figsize=(4, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title(f'Confusion Matrix – {best_model_name}')
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.show()
