# 03 - Defenses: Dropout & Preprocessing

**Goal**: Evaluate simple defense strategies against adversarial attacks.

**Defenses studied**:
1. **Dropout** - regularization that may improve robustness
2. **Gaussian noise preprocessing** - input randomization defense

In [None]:
# Colab setup
import sys
import os

if 'google.colab' in sys.modules:
    %cd /content
    !git clone https://github.com/cdm34/adversarial-robustness.git 2>/dev/null || true
    %cd adversarial-robustness
    sys.path.insert(0, '/content/adversarial-robustness')
else:
    sys.path.insert(0, os.path.abspath('..'))

In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np

from src import (
    FashionMNISTNet, FashionMNISTNetDropout,
    DataConfig, get_fashion_mnist_datasets, split_train_val, make_loaders,
    TrainConfig, fit,
    AttackConfig, fgsm, pgd_linf,
    accuracy, add_gaussian_noise,
    get_device, set_seed,
    plot_robustness_comparison, save_figure,
    FASHION_MNIST_CLASSES,
)

print(f"PyTorch version: {torch.__version__}")

## 1. Setup

In [None]:
set_seed(42)
device = get_device()
print(f"Using device: {device}")

# Load data
train_ds, test_ds = get_fashion_mnist_datasets()
data_cfg = DataConfig(batch_size=128, val_ratio=0.1)
train_subset, val_subset = split_train_val(train_ds, data_cfg.val_ratio, data_cfg.seed)
train_loader, val_loader, test_loader = make_loaders(
    train_subset, val_subset, test_ds, data_cfg, device
)

## 2. Train Models

In [None]:
train_cfg = TrainConfig(epochs=10, lr=1e-3)

In [None]:
# Try to load baseline, otherwise train
baseline_model = FashionMNISTNet().to(device)

if os.path.exists('checkpoints/baseline_cnn.pt'):
    checkpoint = torch.load('checkpoints/baseline_cnn.pt', map_location=device)
    baseline_model.load_state_dict(checkpoint['model_state_dict'])
    print("Loaded baseline model")
else:
    print("Training baseline model...")
    fit(baseline_model, train_loader, val_loader, device, train_cfg)

In [None]:
# Train dropout model
dropout_model = FashionMNISTNetDropout(p=0.3).to(device)

if os.path.exists('checkpoints/dropout_cnn.pt'):
    checkpoint = torch.load('checkpoints/dropout_cnn.pt', map_location=device)
    dropout_model.load_state_dict(checkpoint['model_state_dict'])
    print("Loaded dropout model")
else:
    print("Training dropout model (p=0.3)...")
    result = fit(dropout_model, train_loader, val_loader, device, train_cfg)
    os.makedirs('checkpoints', exist_ok=True)
    torch.save({'model_state_dict': dropout_model.state_dict()}, 'checkpoints/dropout_cnn.pt')
    print(f"Best val acc: {result['best_val_acc']:.2f}%")

## 3. Evaluate Clean Accuracy

In [None]:
baseline_clean = accuracy(baseline_model, test_loader, device)
dropout_clean = accuracy(dropout_model, test_loader, device)

print("Clean Test Accuracy:")
print(f"  Baseline:  {baseline_clean:.2f}%")
print(f"  Dropout:   {dropout_clean:.2f}%")

## 4. Evaluate Robustness

In [None]:
def evaluate_robust_accuracy(model, loader, attack_fn, attack_cfg, device):
    """Compute accuracy on adversarial examples."""
    model.eval()
    correct = 0
    total = 0
    
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        x_adv = attack_fn(model, x, y, attack_cfg)
        
        with torch.no_grad():
            preds = model(x_adv).argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)
    
    return 100.0 * correct / total

In [None]:
# FGSM attack
eps = 0.1
fgsm_cfg = AttackConfig(eps=eps)

baseline_fgsm = evaluate_robust_accuracy(baseline_model, test_loader, fgsm, fgsm_cfg, device)
dropout_fgsm = evaluate_robust_accuracy(dropout_model, test_loader, fgsm, fgsm_cfg, device)

print(f"FGSM Robustness (ε={eps}):")
print(f"  Baseline:  {baseline_fgsm:.2f}%")
print(f"  Dropout:   {dropout_fgsm:.2f}%")

In [None]:
# PGD attack
pgd_cfg = AttackConfig(eps=eps, steps=10, step_size=eps/4)

baseline_pgd = evaluate_robust_accuracy(baseline_model, test_loader, pgd_linf, pgd_cfg, device)
dropout_pgd = evaluate_robust_accuracy(dropout_model, test_loader, pgd_linf, pgd_cfg, device)

print(f"PGD Robustness (ε={eps}):")
print(f"  Baseline:  {baseline_pgd:.2f}%")
print(f"  Dropout:   {dropout_pgd:.2f}%")

## 5. Gaussian Noise Defense

In [None]:
def evaluate_with_noise_defense(model, loader, attack_fn, attack_cfg, noise_sigma, device):
    """Evaluate with Gaussian noise preprocessing as defense."""
    model.eval()
    correct = 0
    total = 0
    
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        
        # Generate adversarial on original input
        x_adv = attack_fn(model, x, y, attack_cfg)
        
        # Apply noise preprocessing
        x_defended = add_gaussian_noise(x_adv, sigma=noise_sigma)
        
        with torch.no_grad():
            preds = model(x_defended).argmax(dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)
    
    return 100.0 * correct / total

In [None]:
# Test different noise levels
noise_sigmas = [0.0, 0.02, 0.05, 0.1, 0.15]

print(f"Gaussian Noise Defense vs FGSM (ε={eps}):")
print("-" * 50)

noise_results = []
for sigma in noise_sigmas:
    if sigma == 0.0:
        acc = baseline_fgsm
    else:
        acc = evaluate_with_noise_defense(baseline_model, test_loader, fgsm, fgsm_cfg, sigma, device)
    noise_results.append(acc)
    print(f"  σ = {sigma:.2f}: {acc:.2f}%")

# Also test on clean data to see trade-off
print("\nClean accuracy with noise preprocessing:")
for sigma in noise_sigmas:
    if sigma == 0.0:
        acc = baseline_clean
    else:
        # No attack, just noise
        correct = 0
        total = 0
        for x, y in test_loader:
            x, y = x.to(device), y.to(device)
            x_noisy = add_gaussian_noise(x, sigma=sigma)
            with torch.no_grad():
                preds = baseline_model(x_noisy).argmax(dim=1)
                correct += (preds == y).sum().item()
                total += y.size(0)
        acc = 100.0 * correct / total
    print(f"  σ = {sigma:.2f}: {acc:.2f}%")

## 6. Comparison Summary

In [None]:
# Summary table
print("\n" + "="*60)
print("DEFENSE COMPARISON SUMMARY")
print("="*60)
print(f"{'Defense':<20} {'Clean Acc':>12} {'FGSM Acc':>12} {'PGD Acc':>12}")
print("-"*60)
print(f"{'Baseline':<20} {baseline_clean:>11.2f}% {baseline_fgsm:>11.2f}% {baseline_pgd:>11.2f}%")
print(f"{'Dropout (p=0.3)':<20} {dropout_clean:>11.2f}% {dropout_fgsm:>11.2f}% {dropout_pgd:>11.2f}%")
print("="*60)

In [None]:
# Bar chart comparison
fig = plot_robustness_comparison(
    model_names=['Baseline', 'Dropout (p=0.3)'],
    clean_accs=[baseline_clean, dropout_clean],
    robust_accs=[baseline_fgsm, dropout_fgsm],
    attack_name=f'FGSM (ε={eps})'
)
save_figure(fig, 'defense_comparison_fgsm')
plt.show()

In [None]:
# PGD comparison
fig = plot_robustness_comparison(
    model_names=['Baseline', 'Dropout (p=0.3)'],
    clean_accs=[baseline_clean, dropout_clean],
    robust_accs=[baseline_pgd, dropout_pgd],
    attack_name=f'PGD (ε={eps})'
)
save_figure(fig, 'defense_comparison_pgd')
plt.show()

## Summary

**Key Findings**:
1. **Dropout** provides marginal robustness improvement but not significant
2. **Gaussian noise preprocessing** can slightly improve robustness but degrades clean accuracy
3. These simple defenses are **not sufficient** for robust models

**Trade-offs**:
- Clean accuracy vs. robustness is a key tension
- Input randomization has diminishing returns

**Next**: Adversarial training for principled robustness