In [2]:
# -------------------------------
# CIFAR-10 Multi-seed, multi-class evaluation with .npz export + timing
# -------------------------------

import os
import time
import numpy as np
import torch
import torch.nn as nn
from lib.cifar10_utils import (
    build_cifar_resnet, get_dataloaders, load_model, evaluate_model
)

# -------------------------------
# Configuration
# -------------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 512
NUM_CLASSES = 10
MODEL_DIR = "./../models/cifar10/"
DATASET_DIR = "./../data/"
RESULTS_DIR = "./../results/cifar10/"
os.makedirs(RESULTS_DIR, exist_ok=True)

SEEDS = [42, 602, 311, 637, 800, 543, 969, 122, 336, 93]
REMOVE_CLASSES = [None, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]  # None = full model

EPOCHS = 50

# CSV for logging time
TIME_CSV = "./../analytics/CIFAR10/timer/cifar10_eval_times.csv"
os.makedirs(os.path.dirname(TIME_CSV), exist_ok=True)
import csv
with open(TIME_CSV, mode="w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["seed", "removed_class", "time_seconds"])

# -------------------------------
# Run evaluation and save results
# -------------------------------
for seed in SEEDS:
    print(f"\n=== Seed {seed} ===")
    seed_start = time.time()

    for remove_class in REMOVE_CLASSES:
        start_time = time.time()
        print(f"Evaluating seed={seed}, remove_class={remove_class}...")

        # Prepare test data
        _, testloader = get_dataloaders(
            batch_size=BATCH_SIZE,
            remove_class=remove_class,
            dataset_dir=DATASET_DIR
        )

        # Build model path
        model_name = f"cifar_resnet_s{seed}_e{EPOCHS}"
        if remove_class is not None:
            model_name += f"_r{remove_class}"
        model_path = os.path.join(MODEL_DIR, model_name + ".pth")

        # Build result path
        result_path = os.path.join(RESULTS_DIR, model_name + ".npz")

        # Skip if already evaluated
#        if os.path.exists(result_path):
#            print(f"Already exists: {result_path}")
#            continue

        # Load and evaluate model
        model = build_cifar_resnet(num_classes=NUM_CLASSES, device=DEVICE)
        load_model(model, model_path, device=DEVICE)

        preds, targets, confs = evaluate_model(
            model, testloader, nn.CrossEntropyLoss(), device=DEVICE
        )

        # Save results to npz
        np.savez_compressed(result_path, preds=preds, targets=targets, confs=confs)

        elapsed = time.time() - start_time
        print(f"Saved: {result_path} | Inference time: {elapsed:.3f}s")

        # Log time to CSV
        with open(TIME_CSV, mode="a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow([seed, remove_class, elapsed])

    seed_elapsed = time.time() - seed_start
    print(f"✅ Completed seed {seed} in {seed_elapsed:.2f} s")

print("\nAll models evaluated and times saved successfully.")



=== Seed 42 ===
Evaluating seed=42, remove_class=None...
Model loaded from ./../models/cifar10/cifar_resnet_s42_e50.pth
Test Loss: 0.273 | Test Acc: 92.57%
Saved: ./../results/cifar10/cifar_resnet_s42_e50.npz | Inference time: 3.868s
Evaluating seed=42, remove_class=0...
Model loaded from ./../models/cifar10/cifar_resnet_s42_e50_r0.pth
Test Loss: 1.409 | Test Acc: 83.10%
Saved: ./../results/cifar10/cifar_resnet_s42_e50_r0.npz | Inference time: 2.935s
Evaluating seed=42, remove_class=1...
Model loaded from ./../models/cifar10/cifar_resnet_s42_e50_r1.pth
Test Loss: 1.838 | Test Acc: 83.57%
Saved: ./../results/cifar10/cifar_resnet_s42_e50_r1.npz | Inference time: 3.142s
Evaluating seed=42, remove_class=2...
Model loaded from ./../models/cifar10/cifar_resnet_s42_e50_r2.pth
Test Loss: 1.448 | Test Acc: 83.57%
Saved: ./../results/cifar10/cifar_resnet_s42_e50_r2.npz | Inference time: 3.201s
Evaluating seed=42, remove_class=3...
Model loaded from ./../models/cifar10/cifar_resnet_s42_e50_r3.pt