# Unified Model Results Aggregation

This notebook aggregates evaluation results from all trained models across previous notebooks into a single comparison table. No models are trained or modified in this notebook.

## Import Required Libraries

In [1]:
import os
import json
import pandas as pd
import numpy as np

## Results Directory Overview

All model evaluation outputs are stored in the `results/` directory. Each model has its own subfolder containing metrics.

In [2]:
RESULTS_DIR = "../results"

model_folders = [
    f for f in os.listdir(RESULTS_DIR)
    if os.path.isdir(os.path.join(RESULTS_DIR, f))
]

model_folders

['adaboost',
 'extra_trees',
 'gaussian_process',
 'gradient_boosting',
 'k_nearest_neighbors',
 'logistic_regression',
 'mlp_neural_network',
 'naive_bayes',
 'random_forest',
 'ridge_classifier',
 'svm_rbf']

## Metric Extraction Utility

This function extracts key evaluation metrics from each modelâ€™s results.

In [3]:
def extract_model_results(model_name):
    model_path = os.path.join(RESULTS_DIR, model_name)

    # Load classification report
    report_path = os.path.join(model_path, "classification_report.json")
    with open(report_path, "r") as f:
        report = json.load(f)

    # Extract macro-averaged metrics
    precision = report["macro avg"]["precision"]
    recall = report["macro avg"]["recall"]
    f1 = report["macro avg"]["f1-score"]
    accuracy = report["accuracy"]

    # Load ROC-AUC if available
    roc_auc_path = os.path.join(model_path, "roc_auc.txt")
    if os.path.exists(roc_auc_path):
        with open(roc_auc_path, "r") as f:
            roc_auc = float(f.read())
    else:
        roc_auc = np.nan

    return {
        "Model": model_name.replace("_", " ").title(),
        "Accuracy": round(accuracy, 3),
        "Precision (Macro)": round(precision, 3),
        "Recall (Macro)": round(recall, 3),
        "F1-score (Macro)": round(f1, 3),
        "ROC-AUC": round(roc_auc, 3) if not np.isnan(roc_auc) else "N/A"
    }

## Aggregate Results from All Models

In [4]:
results = []

for model in model_folders:
    try:
        results.append(extract_model_results(model))
    except Exception as e:
        print(f"Skipping {model}: {e}")

results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model,Accuracy,Precision (Macro),Recall (Macro),F1-score (Macro),ROC-AUC
0,Adaboost,0.899,0.866,0.866,0.865,0.956
1,Extra Trees,0.913,0.884,0.884,0.884,0.979
2,Gaussian Process,0.892,0.857,0.856,0.857,0.972
3,Gradient Boosting,0.913,0.884,0.883,0.884,0.979
4,K Nearest Neighbors,0.615,0.539,0.552,0.543,0.747
5,Logistic Regression,0.899,0.865,0.869,0.867,0.972
6,Mlp Neural Network,0.514,0.451,0.49,0.43,0.673
7,Naive Bayes,0.319,0.451,0.396,0.262,0.716
8,Random Forest,0.912,0.883,0.881,0.882,0.978
9,Ridge Classifier,0.871,0.832,0.832,0.829,


## Sort Models by ROC-AUC

In [6]:
# Convert ROC-AUC column to numeric (coerce non-numeric values to NaN)
results_df["ROC-AUC"] = pd.to_numeric(
    results_df["ROC-AUC"],
    errors="coerce"
)

In [7]:
results_df_sorted = results_df.sort_values(
    by="ROC-AUC",
    ascending=False,
    na_position="last"
).reset_index(drop=True)

results_df_sorted

Unnamed: 0,Model,Accuracy,Precision (Macro),Recall (Macro),F1-score (Macro),ROC-AUC
0,Extra Trees,0.913,0.884,0.884,0.884,0.979
1,Gradient Boosting,0.913,0.884,0.883,0.884,0.979
2,Random Forest,0.912,0.883,0.881,0.882,0.978
3,Logistic Regression,0.899,0.865,0.869,0.867,0.972
4,Gaussian Process,0.892,0.857,0.856,0.857,0.972
5,Adaboost,0.899,0.866,0.866,0.865,0.956
6,K Nearest Neighbors,0.615,0.539,0.552,0.543,0.747
7,Naive Bayes,0.319,0.451,0.396,0.262,0.716
8,Svm Rbf,0.24,0.08,0.333,0.129,0.704
9,Mlp Neural Network,0.514,0.451,0.49,0.43,0.673


## Save Unified Results Table

In [9]:
OUTPUT_PATH = "../reports/unified_model_results.csv"
os.makedirs("../reports", exist_ok=True)

results_df_sorted.to_csv(OUTPUT_PATH, index=False)

print(f"Unified results table saved to {OUTPUT_PATH}")

Unified results table saved to ../reports/unified_model_results.csv
