In [1]:
# -------------------------------
# Multi-seed, multi-class evaluation with .npz export
# -------------------------------

import os
import numpy as np
import torch
import torch.nn as nn
from lib.cifar10_utils import (
    build_cifar_resnet, get_dataloaders, load_model, evaluate_model
)

# -------------------------------
# Configuration
# -------------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 512
NUM_CLASSES = 10
MODEL_DIR = "./../models/cifar10/"
DATASET_DIR = "./../data/"
RESULTS_DIR = "./../results/cifar10/"   # NEW: directory for npz files
os.makedirs(RESULTS_DIR, exist_ok=True)

# Seeds and removed classes to evaluate
SEEDS = [42, 602, 311, 637, 800, 543, 969, 122, 336, 93]            # example seeds
REMOVE_CLASSES = [None, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # None = all classes, 3 = "cat", 5 = "dog" etc.

EPOCHS = 50

# -------------------------------
# Run evaluation and save results
# -------------------------------
for seed in SEEDS:
    for remove_class in REMOVE_CLASSES:
        print(f"\n=== Evaluating: seed={seed}, remove_class={remove_class} ===")

        # Prepare test data
        _, testloader = get_dataloaders(
            batch_size=BATCH_SIZE,
            remove_class=remove_class,
            dataset_dir=DATASET_DIR
        )

        # Build model path
        model_name = f"cifar_resnet_s{seed}_e{EPOCHS}"
        if remove_class is not None:
            model_name += f"_r{remove_class}"
        model_path = os.path.join(MODEL_DIR, model_name + ".pth")

        # Build result path
        result_path = os.path.join(RESULTS_DIR, model_name + ".npz")

        # Skip if already evaluated
        if os.path.exists(result_path):
            print(f"Already exists: {result_path}")
            continue

        # Load and evaluate model
        model = build_cifar_resnet(num_classes=NUM_CLASSES, device=DEVICE)
        load_model(model, model_path, device=DEVICE)

        print(f"Evaluating model: {model_path}")
        preds, targets, confs = evaluate_model(
            model, testloader, nn.CrossEntropyLoss(), device=DEVICE
        )

        # Save results to npz
        np.savez_compressed(
            result_path,
            preds=preds,
            targets=targets,
            confs=confs
        )
        print(f"Saved: {result_path}")



=== Evaluating: seed=42, remove_class=None ===
Model loaded from ./../models/cifar10/cifar_resnet_s42_e50.pth
Evaluating model: ./../models/cifar10/cifar_resnet_s42_e50.pth
Test Loss: 0.273 | Test Acc: 92.57%
Saved: ./../results/cifar10/cifar_resnet_s42_e50.npz

=== Evaluating: seed=42, remove_class=0 ===
Model loaded from ./../models/cifar10/cifar_resnet_s42_e50_r0.pth
Evaluating model: ./../models/cifar10/cifar_resnet_s42_e50_r0.pth
Test Loss: 1.409 | Test Acc: 83.10%
Saved: ./../results/cifar10/cifar_resnet_s42_e50_r0.npz

=== Evaluating: seed=42, remove_class=1 ===


KeyboardInterrupt: 