In [None]:
!pip -q install -U robustbench
!pip -q install -U git+https://github.com/fra31/auto-attack

import os, time, math, random
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
import torchvision
import torchvision.transforms as T
import matplotlib.pyplot as plt

from robustbench.utils import load_model
from autoattack import AutoAttack

def seed_all(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

seed_all(0)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)
if device == "cpu":
    print("⚠️ Tip: Colab → Runtime → Change runtime type → GPU (AutoAttack is much slower on CPU)")

In [None]:
cifar_mean = (0.4914, 0.4822, 0.4465)
cifar_std  = (0.2470, 0.2435, 0.2616)

transform = T.Compose([
    T.ToTensor(),
    T.Normalize(cifar_mean, cifar_std),
])

testset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

N_EXAMPLES = 1000
BATCH_SIZE = 128 if device == "cuda" else 64
NUM_WORKERS = 2

idx = np.random.RandomState(0).choice(len(testset), size=N_EXAMPLES, replace=False)
test_subset = Subset(testset, idx.tolist())
test_loader = DataLoader(test_subset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=(device=="cuda"))

@torch.no_grad()
def loader_to_tensors(loader, device):
    xs, ys = [], []
    for x, y in loader:
        xs.append(x)
        ys.append(y)
    x = torch.cat(xs, dim=0).to(device)
    y = torch.cat(ys, dim=0).to(device)
    return x, y

x_test, y_test = loader_to_tensors(test_loader, device)
print("x_test:", tuple(x_test.shape), "y_test:", tuple(y_test.shape))

In [None]:
MODEL_SPECS = [
    ("Standard", "cifar10", "Linf"),
    ("Wong2020Fast", "cifar10", "Linf"),
    ("Rebuffi2021Fixing_70_16_cutmix_extra", "cifar10", "Linf"),
]

def try_load_rb_model(name, dataset, threat_model, device):
    try:
        m = load_model(model_name=name, dataset=dataset, threat_model=threat_model).to(device)
        m.eval()
        return m
    except Exception as e:
        print(f"❌ Failed to load {name} ({dataset}, {threat_model}). Reason:\n   {type(e).__name__}: {e}\n   -> Skipping.")
        return None

models = []
for name, ds, tm in MODEL_SPECS:
    m = try_load_rb_model(name, ds, tm, device)
    if m is not None:
        models.append((name, m))

print("\nLoaded models:", [n for n,_ in models])
assert len(models) > 0, "No models loaded; check install/runtime."

@torch.no_grad()
def accuracy(model, x, y, batch_size=256):
    n = x.shape[0]
    correct = 0
    for i in range(0, n, batch_size):
        xb = x[i:i+batch_size]
        yb = y[i:i+batch_size]
        logits = model(xb)
        pred = logits.argmax(dim=1)
        correct += (pred == yb).sum().item()
    return correct / n

In [None]:
EPS_LIST = [0/255, 1/255, 2/255, 4/255, 8/255]

AA_VERSION = "standard"

ATTACKS_TO_RUN = None

def run_autoattack(model, x, y, eps, version="standard", attacks_to_run=None, seed=0):
    adversary = AutoAttack(model, norm="Linf", eps=eps, version=version, device=device)
    if attacks_to_run is not None:
        adversary.attacks_to_run = attacks_to_run
    adversary.seed = seed
    x_adv = adversary.run_standard_evaluation(x, y, bs=BATCH_SIZE)
    return x_adv

results = {}

for name, model in models:
    print("\n" + "="*80)
    print("Model:", name)

    clean_acc = accuracy(model, x_test, y_test, batch_size=BATCH_SIZE)
    print(f"Clean accuracy on subset: {clean_acc*100:.2f}%")

    curve = []
    for eps in EPS_LIST:
        if eps == 0:
            curve.append((eps, clean_acc, clean_acc))
            continue

        t0 = time.time()
        x_adv = run_autoattack(model, x_test, y_test, eps=eps, version=AA_VERSION, attacks_to_run=ATTACKS_TO_RUN, seed=0)
        rob_acc = accuracy(model, x_adv, y_test, batch_size=BATCH_SIZE)
        dt = time.time() - t0

        print(f"  eps={eps:.5f} ({eps*255:.1f}/255)  robust_acc={rob_acc*100:.2f}%   time={dt/60:.1f} min")
        curve.append((eps, clean_acc, rob_acc))

        del x_adv
        if device == "cuda":
            torch.cuda.empty_cache()

    results[name] = curve

In [None]:
plt.figure(figsize=(8, 5))
for name, curve in results.items():
    eps_vals = [e*255 for (e, _, _) in curve]
    rob_vals = [r*100 for (_, _, r) in curve]
    plt.plot(eps_vals, rob_vals, marker="o", label=name)

plt.xlabel("L∞ perturbation budget ε (in /255)")
plt.ylabel("Accuracy under AutoAttack (%)")
plt.title(f"AutoAttack Robustness Curves (CIFAR-10 subset n={N_EXAMPLES}, AA={AA_VERSION})")
plt.grid(True, alpha=0.3)
plt.legend()
plt.show()

print("\nSummary (robust acc at 8/255):")
for name, curve in results.items():
    r8 = None
    for eps, _, rob in curve:
        if abs(eps - (8/255)) < 1e-12:
            r8 = rob
            break
    if r8 is not None:
        print(f"  {name:40s}  {r8*100:6.2f}%")
    else:
        print(f"  {name:40s}  (no 8/255 point)")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.9/90.9 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.4/194.4 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.8/54.8 kB[0m [31m967.0 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for autoattack (setup.py) ... [?25l[?25hdone
Device: cpu
⚠️ Tip: Colab → Runtime → Change runtime type → GPU (AutoAttack is much slower on CPU)


100%|██████████| 170M/170M [00:02<00:00, 77.9MB/s]


x_test: (1000, 3, 32, 32) y_test: (1000,)
Downloading models/cifar10/Linf/Standard.pt (gdrive_id=1t98aEuzeTL8P7Kpd5DIrCoCL21BNZUhC).


Downloading...
From (original): https://drive.google.com/uc?id=1t98aEuzeTL8P7Kpd5DIrCoCL21BNZUhC
From (redirected): https://drive.google.com/uc?id=1t98aEuzeTL8P7Kpd5DIrCoCL21BNZUhC&confirm=t&uuid=5f38f5ca-1ccf-4a1c-bcb1-58d241b029f5
To: /content/models/cifar10/Linf/Standard.pt
100%|██████████| 292M/292M [00:02<00:00, 139MB/s]


Downloading models/cifar10/Linf/Wong2020Fast.pt (gdrive_id=1Re--_lf3jCEw9bnQqGkjw3J7v2tSZKrv).


Downloading...
From: https://drive.google.com/uc?id=1Re--_lf3jCEw9bnQqGkjw3J7v2tSZKrv
To: /content/models/cifar10/Linf/Wong2020Fast.pt
100%|██████████| 44.7M/44.7M [00:00<00:00, 108MB/s]


Downloading models/cifar10/Linf/Rebuffi2021Fixing_70_16_cutmix_extra.pt (gdrive_id=1qKDTp6IJ1BUXZaRtbYuo_t0tuDl_4mLg).


Downloading...
From (original): https://drive.google.com/uc?id=1qKDTp6IJ1BUXZaRtbYuo_t0tuDl_4mLg
From (redirected): https://drive.google.com/uc?id=1qKDTp6IJ1BUXZaRtbYuo_t0tuDl_4mLg&confirm=t&uuid=ac5aca05-95d9-46d1-a500-997e620f1ec4
To: /content/models/cifar10/Linf/Rebuffi2021Fixing_70_16_cutmix_extra.pt
100%|██████████| 1.07G/1.07G [00:04<00:00, 238MB/s]



Loaded models: ['Standard', 'Wong2020Fast', 'Rebuffi2021Fixing_70_16_cutmix_extra']

Model: Standard
Clean accuracy on subset: 30.40%
setting parameters for standard version
using standard version including apgd-ce, apgd-t, fab-t, square.
initial accuracy: 30.40%
apgd-ce - 1/5 - 22 out of 64 successfully perturbed
apgd-ce - 2/5 - 24 out of 64 successfully perturbed
apgd-ce - 3/5 - 27 out of 64 successfully perturbed
apgd-ce - 4/5 - 32 out of 64 successfully perturbed
apgd-ce - 5/5 - 24 out of 48 successfully perturbed
robust accuracy after APGD-CE: 17.50% (total time 12038.3 s)
