# 03 — Evaluate & Visualise

Load a saved model checkpoint, compute **confusion matrix**, **classification report**, **top-1/top-5**, **F1 (macro)**, **mAP**, and visualise **feature maps**.

In [None]:
# %pip install torch torchvision torchaudio
# %pip install numpy pandas scikit-learn matplotlib seaborn tqdm

import os, numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
from pathlib import Path
from tqdm import tqdm
import torch, torch.nn as nn, torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import confusion_matrix, classification_report, top_k_accuracy_score, f1_score, average_precision_score

DATA_ROOT = Path("/path/to/Places2_simp")  # <-- EDIT
CHECKPOINT = Path("../checkpoints/best_resnet34.pth")
BATCH_VAL, NUM_WORKERS = 1024, 4

val_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

dataset = datasets.ImageFolder(root=str(DATA_ROOT), transform=val_tf)
num_classes = len(dataset.classes)
# Use same 80/20 split protocol as training:
bare = datasets.ImageFolder(root=str(DATA_ROOT))
targets = np.array([y for _, y in bare.imgs])
val_idx = []
for cls, idxs in pd.Series(np.arange(len(targets))).groupby(targets).groups.items():
    idxs = np.array(list(idxs)); np.random.shuffle(idxs); n_val = int(0.2*len(idxs))
    val_idx.extend(idxs[:n_val])
val_ds = Subset(dataset, val_idx)
val_loader = DataLoader(val_ds, batch_size=BATCH_VAL, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

def build_resnet34(num_classes):
    m = models.resnet34(weights=None)
    m.fc = nn.Sequential(nn.Dropout(0.5), nn.Linear(m.fc.in_features, num_classes))
    return m

device = "cuda" if torch.cuda.is_available() else "cpu"
model = build_resnet34(num_classes).to(device)
model.load_state_dict(torch.load(CHECKPOINT, map_location=device))
model.eval()

In [None]:
# Metrics
all_probs, all_true = [], []
with torch.no_grad():
    for x,y in val_loader:
        x = x.to(device)
        probs = torch.softmax(model(x), dim=1).cpu().numpy()
        all_probs.append(probs); all_true.append(y.numpy())
probs = np.concatenate(all_probs); y_true = np.concatenate(all_true)
y_pred = probs.argmax(1)

top1 = (y_pred == y_true).mean()
top5 = top_k_accuracy_score(y_true, probs, k=5, labels=np.arange(num_classes))
f1 = f1_score(y_true, y_pred, average="macro")
y_true_ovr = np.eye(num_classes)[y_true]
mAP = average_precision_score(y_true_ovr, probs, average="macro")
print(f"Top-1: {top1*100:.2f}% | Top-5: {top5*100:.2f}% | F1-macro: {f1:.3f} | mAP: {mAP:.3f}")
print(classification_report(y_true, y_pred, target_names=dataset.classes)[:1000], '...')

In [None]:
# Confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=np.arange(num_classes))
fig, ax = plt.subplots(figsize=(11,9))
sns.heatmap(cm, ax=ax, cmap="Blues", square=True, cbar=True)
ax.set_title("Confusion Matrix — Validation"); ax.set_xlabel("Predicted"); ax.set_ylabel("True")
Path("../docs/images").mkdir(parents=True, exist_ok=True)
plt.tight_layout(); plt.savefig("../docs/images/confusion_val.png", dpi=200); plt.show()

In [None]:
# Feature maps (first conv layer)
hook_out = {}
def hook_fn(m,i,o): hook_out['fm'] = o.detach().cpu()
h = model.conv1.register_forward_hook(hook_fn)
x,_ = next(iter(val_loader)); x=x[:8].to(device)
with torch.no_grad(): _ = model(x)
fm = hook_out['fm']
N = min(8, fm.shape[1])
fig, axes = plt.subplots(1,N, figsize=(2*N,2))
for i in range(N):
    axes[i].imshow(fm[0,i].numpy(), cmap='magma'); axes[i].axis('off')
plt.suptitle("Feature maps — conv1"); plt.tight_layout(); plt.savefig("../docs/images/featuremaps_conv1.png", dpi=200); plt.show()
h.remove()