# CIFAR-10: Training and Testing on a Clean Dataset & Adversarial Analysis

## Imports and CIFAR-10 loading

In [None]:
# Imports all the module paths
import sys

sys.path.append("../")

# Loads the rest of the modules
import torch
import torch.nn as nn

# File containing all the required training methods
import defenses.cifar10 as cifar10

# For testing
import utils.clean_test as clean_test

# Contains the data loadders
import utils.dataloaders as dataloaders

# For printing outcomes
import utils.printing as printing

# Example printing, but I removed it to simplify results
# for epsilon in epsilons:
#     printing.print_attack(
#         model,
#         testSetLoader,
#         "FGSM",
#         attacks["FGSM"],
#         epsilon=epsilon,
#     )

## Load the dataset

In [None]:
DATA_ROOT = "../datasets/CIFAR10"

trainSetLoader, _, testSetLoader = dataloaders.get_CIFAR10_data_loaders(
    DATA_ROOT,
    trainSetSize=50000,
    validationSetSize=0,
    batchSize=128,
)

## Note: the functions also provide a way to have shorter training times. However, this provides worse results. See my thesis for why.

## Standard Training

In [None]:
SAVE_LOAD_ROOT = "../models_data/CIFAR10"

standard_model = cifar10.standard_training(
    trainSetLoader,
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/cifar10_standard",
)

In [None]:
# Test the model
clean_test.test_trained_model(standard_model, testSetLoader)

In [None]:
# Save the model
torch.save(standard_model, SAVE_LOAD_ROOT + "/cifar10_standard")

## Attacks and Their Results

In [None]:
# A possible attacks array (for nice printing):
import utils.attacking as attacking

attacks = {}

### FGSM (Fast Sign Gradient Method) https://arxiv.org/abs/1412.6572

The formula that is used for producing the adversarial example in this case is:

$x' = x + \epsilon * sign(\nabla_{x}J(\theta, x, y))$, where J is the loss function used.

In [None]:
import attacks.fgsm as fgsm

attacks["FGSM"] = fgsm.fgsm_attack

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        standard_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Print some images to evidence adversarial examples
import matplotlib.pyplot as plt
import numpy as np
import torchvision

def print_image(image, title, plot):
    numpy_image = image.numpy()
    plot.imshow(np.transpose(numpy_image, (1, 2, 0)))
    plot.set_title(title)

def print_attackus(model, testSetLoader, attack_name, attack_function, number_of_images=1, **kwargs):
    # Network parameters
    loss_function = nn.CrossEntropyLoss()

    # Check if using epsilon
    if "epsilon" in kwargs:
        epsilon = kwargs["epsilon"]
    else:
        epsilon = None

    # Check if using alpha
    if "alpha" in kwargs:
        alpha = kwargs["alpha"]
    else:
        alpha = None

    # This is becase for each image, we want to also print the perturbed image
    number_columns = 2

    # Subplot(r,c) provide the number of rows and columns
    figure, axarr = plt.subplots(
        number_of_images,
        number_columns,
        figsize=(2 * number_columns, 2.5 * number_of_images),
    )
    figure.subplots_adjust(right=1)
    figure.subplots_adjust(hspace=1)

    # Check if using a library attack
    if "library" in kwargs:
        from_library = kwargs["library"]
    else:
        from_library = False

    if epsilon is not None:
        figure.suptitle("Original Image (left) and Adversarial Example (right)".format(attack_name, epsilon))
    else:
        figure.suptitle("{} Attack".format(attack_name))

    # Get iterations
    if "iterations" in kwargs:
        iterations = kwargs["iterations"]
    else:
        iterations = None

    # Select the images and show the noise
    correct_image_broken = 0
    while True:
        # Get random image index
        index = np.random.randint(0, len(testSetLoader.dataset))

        # Get an image and cast it to CUDA if needed, cast to proper batches
        image, label = testSetLoader.dataset[index]
        image = image[None, :]
        label = torch.as_tensor((label,))

        image, label = image.to(device), label.to(device)

        # Predict
        logits = model(image)
        _, pred = torch.max(logits, 1)

        # Only count correct images
        if pred != label:
            continue

        # Perturb the images using the attack
        if not from_library:
            perturbed_image = attack_function(
                image,
                label,
                model,
                loss_function,
                epsilon=epsilon,
                alpha=alpha,
                scale=True,
                iterations=iterations,
            )
        else:
            perturbed_image = attack_function(image, label)

        # Calculate results
        logits = model(perturbed_image)
        _, fgsm_pred = torch.max(logits, 1)

        pred = pred.cpu().detach()[0]
        fgsm_pred = fgsm_pred.cpu().detach()[0]

        # Get the plots
        if number_of_images == 1:
            image_plot = axarr[0]
            perturbed_image_plot = axarr[1]
        else:
            image_plot = axarr[correct_image_broken, 0]
            perturbed_image_plot = axarr[correct_image_broken, 1]

        # Print the original image
        print_image(
            torchvision.utils.make_grid(image.cpu().data, normalize=True),
            f"Predicted {testSetLoader.dataset.classes[pred]}",
            image_plot,
        )

        # Print the perturbed iamge
        print_image(
            torchvision.utils.make_grid(perturbed_image.cpu().data, normalize=True),
            f"Predicted {testSetLoader.dataset.classes[fgsm_pred]}",
            perturbed_image_plot,
        )

        # Only count correctly predicted images that got tricked
        correct_image_broken += 1
        if correct_image_broken >= number_of_images:
            break

In [None]:
# Define the `device` PyTorch will be running on, please hope it is CUDA
device = "cuda" if torch.cuda.is_available() else "cpu"

# Run test for each epsilon
print_attackus(
    standard_model,
    testSetLoader,
    "FGSM",
    attacks["FGSM"],
    epsilon=0.01,
)

### I-FGSM / BIM (Iterative FGSM / Basic Iterative Method) https://arxiv.org/abs/1607.02533

The formula that is used for producing the adversarial example in this case is:

$x'_{0} = x$
$x'_{k + 1} = Clip_{x, \epsilon} \{x'_{k} + \alpha * sign(\nabla_{x}J(\theta, x'_{k}, y)\}$, where J is the loss function used. Note that in the paper proposing this method, $\alpha = 1$ is used, so pixels are changed by 1 in each iteration.

The formula for the clip function is also given in the paper.

In [None]:
import attacks.ifgsm as ifgsm

attacks["I-FGSM"] = ifgsm.ifgsm_attack

In [None]:
# Several values for epsilon. It's interesting to see how clamping affects results
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        standard_model,
        testSetLoader,
        "I-FGSM",
        attacks["I-FGSM"],
        epsilon=epsilon,
        alpha=1,
        iterations=3,
    )

### PGD (Projected Gradient Descent) https://arxiv.org/pdf/1706.06083.pdf

The formula that is used for producing the adversarial example in this case is:

$x^{t+1} = \Pi_{x+S}(x^t+\alpha sgn(\bigtriangledown_x L(\theta, x, y)))$\n
where S is a set of allowed perturbations. Note that this is basically BIM with random initialisation. 

We use the $L_{\infty}$ norm here, i.e. we use clamping. 

In [None]:
import attacks.pgd as pgd

attacks["PGD"] = pgd.pgd_attack

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        standard_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=20,
    )

## From here, use existing libraries to simplify usageimport torchattacks


In [None]:
import torchattacks

### DeepFool https://arxiv.org/pdf/1511.04599.pdf

The formula here is complex, so I recommend for the interested reader to go the above link to read the full paper.

In [None]:
deepfool_attack = torchattacks.DeepFool(standard_model)

attacks["DeepFool"] = deepfool_attack

In [None]:
attacking.attack_model(
    standard_model,
    testSetLoader,
    "DeepFool",
    attacks["DeepFool"],
    library=True,
)

### CW-$L_{2}$ (Carlini & Wagner using $L_{2}$ norm) https://arxiv.org/pdf/1608.04644.pdf

The formula here is complex, so I recommend for the interested reader to go the above link to read the full paper.

In [None]:
cw_attack = torchattacks.CW(standard_model, c=1, steps=50)

attacks["CW"] = cw_attack

In [None]:
attacking.attack_model(
    standard_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

## Adversarial Training

### FGSM Adversarial Training

In [None]:
fgsm_model = cifar10.adversarial_training(
    trainSetLoader,
    "FGSM",
    attacks["FGSM"],
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/cifar10_fgsm",
    epsilon=0.45,
)

In [None]:
clean_test.test_trained_model(fgsm_model, testSetLoader)

In [None]:
# Save the model
torch.save(fgsm_model, SAVE_LOAD_ROOT + "/cifar10_fgsm")

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        fgsm_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        fgsm_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )

### PGD Adversarial Training

In [None]:
pgd_model = cifar10.adversarial_training(
    trainSetLoader,
    "PGD",
    attacks["PGD"],
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/cifar10_pgd",
    epsilon=(8 / 255),
    alpha=(2 / 255),
    iterations=7,
)

In [None]:
clean_test.test_trained_model(pgd_model, testSetLoader)

In [None]:
# Save the model
torch.save(pgd_model, SAVE_LOAD_ROOT + "/cifar10_pgd")

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        pgd_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        pgd_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )

In [None]:
deepfool_attack = torchattacks.DeepFool(pgd_model)

attacks["DeepFool"] = deepfool_attack

In [None]:
attacking.attack_model(
    pgd_model,
    testSetLoader,
    "DeepFool",
    attacks["DeepFool"],
    library=True,
)

In [None]:
cw_attack = torchattacks.CW(pgd_model, c=20)

attacks["CW"] = cw_attack

In [None]:
attacking.attack_model(
    pgd_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

## Interpolated Adversarial Training

In [None]:
interpolated_pgd_model = cifar10.interpolated_adversarial_training(
    trainSetLoader,
    "PGD",
    attacks["PGD"],
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/cifar10_interpolated_pgd_reliable",
    epsilon=(7 / 255),
    alpha=(2 / 255),
    iterations=7,
    test=True,
    verbose=True,
    testSetLoader=testSetLoader,
)

In [None]:
clean_test.test_trained_model(interpolated_pgd_model, testSetLoader)

In [None]:
# Save the model
torch.save(
    interpolated_pgd_model, SAVE_LOAD_ROOT + "/cifar10_interpolated_pgd_reliable"
)

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        interpolated_pgd_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        interpolated_pgd_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )

## Dual / Triple Adversarial Training

In [None]:
dual_model = cifar10.dual_adversarial_training(
    trainSetLoader,
    attacks["PGD"],
    attacks["FGSM"],
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/cifar10_dual_stronger_fgsm",
    epsilon1=(8 / 255),
    epsilon2=0.1,
    alpha=(2 / 255),
    iterations=7,
)

In [None]:
clean_test.test_trained_model(dual_model, testSetLoader)

In [None]:
# Save the model
torch.save(dual_model, SAVE_LOAD_ROOT + "/cifar10_dual_stronger_fgsm")

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        dual_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        dual_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )

In [None]:
deepfool_attack = torchattacks.DeepFool(dual_model)

attacks["DeepFool"] = deepfool_attack

In [None]:
attacking.attack_model(
    dual_model,
    testSetLoader,
    "DeepFool",
    attacks["DeepFool"],
    library=True,
)

In [None]:
cw_attack = torchattacks.CW(dual_model, c=20)

attacks["CW"] = cw_attack

In [None]:
attacking.attack_model(
    dual_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

## Jacobian Regularization Training

In [None]:
jacobian_model = cifar10.jacobian_training(
    trainSetLoader,
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/cifar10_jacobian",
)

In [None]:
clean_test.test_trained_model(jacobian_model, testSetLoader)

In [None]:
# Save the model
torch.save(jacobian_model, SAVE_LOAD_ROOT + "/cifar10_jacobian")

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        jacobian_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        jacobian_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )

In [None]:
deepfool_attack = torchattacks.DeepFool(jacobian_model)

attacks["DeepFool"] = deepfool_attack

In [None]:
attacking.attack_model(
    jacobian_model,
    testSetLoader,
    "DeepFool",
    attacks["DeepFool"],
    library=True,
)

In [None]:
cw_attack = torchattacks.CW(jacobian_model, c=20)

attacks["CW"] = cw_attack

In [None]:
attacking.attack_model(
    jacobian_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

## ALP Training

In [None]:
alp_model = cifar10.ALP_training(
    trainSetLoader,
    "PGD",
    attacks["PGD"],
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/cifar10_alp",
    epsilon=0.45,
    alpha=(2 / 255),
    iterations=7,
)

In [None]:
clean_test.test_trained_model(alp_model, testSetLoader)

In [None]:
# Save the model
torch.save(alp_model, SAVE_LOAD_ROOT + "/cifar10_alp")

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        alp_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        alp_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )

In [None]:
deepfool_attack = torchattacks.DeepFool(alp_model)

attacks["DeepFool"] = deepfool_attack

In [None]:
attacking.attack_model(
    alp_model,
    testSetLoader,
    "DeepFool",
    attacks["DeepFool"],
    library=True,
)

In [None]:
cw_attack = torchattacks.CW(alp_model, c=20)

attacks["CW"] = cw_attack

In [None]:
attacking.attack_model(
    alp_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

# Comparing with other implementations

In [None]:
import models.resnet as resnet

# Define the `device` PyTorch will be running on, please hope it is CUDA
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Notebook will use PyTorch Device: " + device.upper())

## Comparison with Towards Deep Learning Models Resistant to Adversarial Attacks (https://arxiv.org/abs/1706.06083)

In [None]:
towards_model = resnet.ResNet18()
towards_model = torch.load(SAVE_LOAD_ROOT + "/external/cifar10_towards")
towards_model.to(device)
towards_model.eval()

# Test the model
clean_test.test_trained_model(towards_model, testSetLoader)

# Save the model
torch.save(towards_model, SAVE_LOAD_ROOT + "/external/cifar10_towards")

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        towards_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        towards_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )

## Jacobian + ALP Training

In [None]:
jacobian_alp_model = cifar10.jacobian_ALP_training(
    trainSetLoader,
    "PGD",
    attacks["PGD"],
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/cifar10_jacobian_alp",
    epsilon=0.45,
    alpha=(2 / 255),
    iterations=7,
)

In [None]:
clean_test.test_trained_model(jacobian_alp_model, testSetLoader)

In [None]:
# Save the model
torch.save(jacobian_alp_model, SAVE_LOAD_ROOT + "/cifar10_jacobian_alp")

In [None]:
# Several values to use for the epsilons
epsilons = [0, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        jacobian_alp_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

In [None]:
# Several values to use for the epsilons
epsilons = [0, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [None]:
for epsilon in epsilons:
    attacking.attack_model(
        jacobian_alp_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )