# 06_run_experiments_and_summarise.ipynb

**Objective:**  
1. Remount Drive & set paths  
2. Discover all saved `(arch, pool, pca)` model files in `models/`  
3. For each combo, load the model and corresponding `yale_test` features, compute test accuracy  
4. Aggregate results into a leaderboard DataFrame  
5. Save `metrics/leaderboard.csv` and display the top performers  


In [5]:
# Cell Tag: imports
import joblib
import numpy as np
import pandas as pd
import re


In [6]:
# Cell Tag: parameters
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

from pathlib import Path
import os

# Paths
ROOT        = Path("/content/drive/My Drive/Colab Notebooks/CPSC 381-581: Machine Learning/Final Project")
MODEL_DIR   = ROOT / "models"
FEATURE_DIR = ROOT / "features"
METRICS_DIR = ROOT / "metrics"
METRICS_DIR.mkdir(parents=True, exist_ok=True)

# Output
LEADERBOARD_CSV = METRICS_DIR / "leaderboard.csv"

print("Model dir:", MODEL_DIR)
print("Feature dir:", FEATURE_DIR)
print("Metrics dir:", METRICS_DIR)


Mounted at /content/drive
Model dir: /content/drive/My Drive/Colab Notebooks/CPSC 381-581: Machine Learning/Final Project/models
Feature dir: /content/drive/My Drive/Colab Notebooks/CPSC 381-581: Machine Learning/Final Project/features
Metrics dir: /content/drive/My Drive/Colab Notebooks/CPSC 381-581: Machine Learning/Final Project/metrics


In [7]:
# Cell Tag: execute
results = []

pattern = re.compile(r"(.+?)_(gap|gmp)_(none|95var|128)_.+?\.pkl")

for model_path in sorted(MODEL_DIR.glob("*.pkl")):
    m = pattern.match(model_path.name)
    if not m:
        print("Skipping unrecognized model file:", model_path.name)
        continue

    arch, pool, pca = m.groups()
    feature_file = FEATURE_DIR / f"yale_test_{arch}_{pool}_{pca}.npz"
    if not feature_file.exists():
        print("Missing features for:", model_path.name)
        continue

    # load model & data
    model = joblib.load(model_path)
    data  = np.load(feature_file)
    X_test, y_test = data["X"], data["y"]

    # compute accuracy
    acc = model.score(X_test, y_test)
    results.append({
        "arch": arch,
        "pool": pool,
        "pca": pca,
        "accuracy": acc
    })
    print(f"Evaluated {arch}-{pool}-{pca}: acc={acc:.4f}")

# build DataFrame
df = pd.DataFrame(results)
df.sort_values("accuracy", ascending=False, inplace=True)


Evaluated resnet50-gap-95var: acc=0.5821


In [8]:
# Cell Tag: save-and-display
df.to_csv(LEADERBOARD_CSV, index=False)
print("Saved leaderboard to", LEADERBOARD_CSV)

# Display top 10
import IPython.display as disp
disp.display(df.head(10))


Saved leaderboard to /content/drive/My Drive/Colab Notebooks/CPSC 381-581: Machine Learning/Final Project/metrics/leaderboard.csv


Unnamed: 0,arch,pool,pca,accuracy
0,resnet50,gap,95var,0.58209
