# 06 â€“ Model Evaluation

This notebook compares the performance of the trained models using various metrics and visualises the results with confusion matrices and ROC curves.


In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, confusion_matrix, roc_curve
import joblib

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
models_dir = os.path.join(project_root, 'models')
processed_dir = os.path.join(project_root, 'data', 'processed')

# Load test data
X_test = pd.read_csv(os.path.join(processed_dir, 'X_test.csv'))
y_test = pd.read_csv(os.path.join(processed_dir, 'y_test.csv')).squeeze()

# Load models
def load_model(name):
    return joblib.load(os.path.join(models_dir, f"{name}_model.pkl"))

model_names = ['LogisticRegression', 'RandomForest', 'GradientBoosting', 'XGBoost', 'Voting', 'Stacking']

results = []
roc_data = []

for name in model_names:
    model_path = os.path.join(models_dir, f"{name}_model.pkl") if name not in ['Voting', 'Stacking'] else os.path.join(models_dir, f"{name}_model.pkl")
    if not os.path.exists(model_path):
        continue
    model = joblib.load(model_path)
    preds = model.predict(X_test)
    probas = model.predict_proba(X_test)[:, 1]
    acc = accuracy_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    auc = roc_auc_score(y_test, probas)
    results.append((name, acc, f1, auc))
    fpr, tpr, _ = roc_curve(y_test, probas)
    roc_data.append((name, fpr, tpr, auc))

# Display results
results_df = pd.DataFrame(results, columns=['Model', 'Accuracy', 'F1', 'AUC'])
print(results_df.sort_values(by='AUC', ascending=False))

# Plot ROC curves
plt.figure(figsize=(8, 6))
for name, fpr, tpr, auc in roc_data:
    plt.plot(fpr, tpr, label=f"{name} (AUC={auc:.3f})")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend()
plt.show()
