# CIFAR-10: Adversarial Training and Defenses

## Imports and CIFAR-10 loading

In [1]:
# For plotting
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.utils

# Nice loading bars
from tqdm.notebook import tnrange, tqdm

In [2]:
# Define the `device` PyTorch will be running on, please hope it is CUDA
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Notebook will use PyTorch Device: " + device.upper())

Notebook will use PyTorch Device: CUDA


## Clean Testing

In [3]:
# Get the data loaders (assume we do no validation)
import utils.dataloaders as dataloaders

DATA_ROOT = "./datasets/"

trainSetLoader, _, testSetLoader = dataloaders.get_CIFAR10_data_loaders(
    DATA_ROOT,
    trainSetSize=50000,
    validationSetSize=0,
    batchSize=64,
)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [4]:
# Load the model
model = torch.load("./cifar10_model")

In [5]:
# Test the loaded model
import utils.clean_test as clean_test

clean_test.test_trained_model(model, testSetLoader)

Testing the model...


Testing Progress:   0%|          | 0/157 [00:00<?, ?it/s]

... done! Accuracy: 87.28%


## Adversarial Training

In [6]:
import attacks.fgsm as fgsm
import attacks.pgd as pgd

attacks = {}
attacks["FGSM"] = fgsm.fgsm_attack
attacks["PGD"] = pgd.pgd_attack

import utils.attacking as attacking

# For printing outcomes
import utils.printing as printing

In [7]:
# Adversarial examples should be typically generated when model parameters are not
# changing i.e. model parameters are frozen. This step may not be required for very
# simple linear models, but is a must for models using components such as dropout
# or batch normalization.
def get_adversarially_trained_model(model, attack, **kwargs):
    # Various training parameters
    epochs = 20
    learning_rate = 0.01

    # Network parameters
    loss_function = nn.CrossEntropyLoss()
    model.train()

    # Consider using ADAM here as another gradient descent algorithm
    optimizer = torch.optim.SGD(
        model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4
    )

    # Check if using epsilon
    if "epsilon" in kwargs:
        epsilon = kwargs["epsilon"]
    else:
        epsilon = None

    # Check if using alpha
    if "alpha" in kwargs:
        alpha = kwargs["alpha"]
    else:
        alpha = None

    # Get iterations
    if "iterations" in kwargs:
        iterations = kwargs["iterations"]
    else:
        iterations = None

    # Get the attack
    attack_function = attacks[attack]

    print("Training the model using adversarial examples...")

    # Use a pretty progress bar to show updates
    for epoch in tnrange(epochs, desc="Adversarial Training Progress"):
        for _, (images, labels) in enumerate(tqdm(trainSetLoader, desc="Batches")):
            # Cast to proper tensors
            images, labels = images.to(device), labels.to(device)

            # Run the attack
            model.eval()
            perturbed_image = attack_function(
                images,
                labels,
                model,
                loss_function,
                epsilon=epsilon,
                alpha=alpha,
                scale=True,
                iterations=iterations,
            )
            model.train()

            # Predict and optimise
            optimizer.zero_grad()
            logits = model(perturbed_image)

            loss = loss_function(logits, labels)
            loss.backward()
            optimizer.step()

    print("... done!")

    # Return the trained model
    return model

### FGSM Adversarial Training

In [8]:
# Load and test the loaded model (shortcutting many steps)
fgsm_model = torch.load("./cifar10_model")
loss_function = nn.CrossEntropyLoss()

clean_test.test_trained_model(fgsm_model, testSetLoader)

Testing the model...


Testing Progress:   0%|          | 0/157 [00:00<?, ?it/s]

... done! Accuracy: 87.35%


In [9]:
fgsm_model = get_adversarially_trained_model(model, "FGSM", epsilon=0.45)

Training the model using adversarial examples...


Adversarial Training Progress:   0%|          | 0/20 [00:00<?, ?it/s]

Batches:   0%|          | 0/782 [00:00<?, ?it/s]

Batches:   0%|          | 0/782 [00:00<?, ?it/s]

Batches:   0%|          | 0/782 [00:00<?, ?it/s]

Batches:   0%|          | 0/782 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    printing.print_attack(
        fgsm_model,
        testSetLoader,
        loss_function,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        fgsm_model,
        testSetLoader,
        loss_function,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    printing.print_attack(
        fgsm_model,
        testSetLoader,
        loss_function,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=20,
    )

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        fgsm_model,
        testSetLoader,
        loss_function,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=20,
    )

In [None]:
# Make sure to test the final accuracy of the model
clean_test.test_trained_model(fgsm_model, testSetLoader)

### PGD Adversarial Training

In [None]:
# Load and test the loaded model (shortcutting many steps)
pgd_model = torch.load("./cifar10_model")
loss_function = nn.CrossEntropyLoss()

clean_test.test_trained_model(fgsm_model, testSetLoader)

In [None]:
pgd_model = get_adversarially_trained_model(
    model, "PGD", epsilon=0.45, alpha=(2 / 255), iterations=20
)

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    printing.print_attack(
        pgd_model,
        testSetLoader,
        loss_function,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        pgd_model,
        testSetLoader,
        loss_function,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    printing.print_attack(
        pgd_model,
        testSetLoader,
        loss_function,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=20,
    )

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        pgd_model,
        testSetLoader,
        loss_function,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=20,
    )

In [None]:
# Make sure to test the final accuracy of the model
clean_test.test_trained_model(pgd_model, testSetLoader)