# CIFAR-10: Framework Implementation and Adversarial Robustness Analysis

## Imports and CIFAR-10 loading

In [1]:
# Imports all the module paths
import sys
sys.path.append("../../")

# Requirements for running everything
import torch

# File containing all the required training methods
import defences.cifar10_library as cifar10_library

# For testing
import utils.clean_test as clean_test

# Contains the data loadders
import utils.dataloaders as dataloaders

# For printing outcomes
# import utils.printing as printing

# Example printing, but I removed it to simplify results
# for epsilon in epsilons:
#     printing.print_attack(
#         model,
#         testSetLoader,
#         "FGSM",
#         attacks["FGSM"],
#         epsilon=epsilon,
#     )

Notebook will use PyTorch Device: CUDA
Notebook will use PyTorch Device: CUDA
Notebook will use PyTorch Device: CUDA
Notebook will use PyTorch Device: CUDA
Notebook will use PyTorch Device: CUDA


## Load the dataset

In [2]:
DATA_ROOT = "../../datasets/CIFAR10"

trainSetLoader, _, testSetLoader = dataloaders.get_CIFAR10_data_loaders(
    DATA_ROOT,
    trainSetSize=50000,
    validationSetSize=0,
    batchSize=128,
)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


## Save path

In [3]:
SAVE_LOAD_ROOT = "../../data/cifar10"

## Load the Attacks For Training

In [4]:
# A possible attacks array (for nice printing):
# Some attacks use a helper library
import torchattacks

import attacks.fgsm as fgsm
import attacks.ifgsm as ifgsm
import attacks.pgd as pgd
import utils.attacking as attacking

attacks = {}

attacks["FGSM"] = fgsm.fgsm_attack
attacks["I-FGSM"] = ifgsm.ifgsm_attack
attacks["PGD"] = pgd.pgd_attack

## Training Phase: Jacobian Regularizared PG+$CW_2$ Adversarially Trained Model (i.e. 2-Attack Adversarial Training)

In [5]:
framework_model = cifar10_library.framework_training(
    trainSetLoader,
    attack_function1=attacks["PGD"],
    attack_function2=None,
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/cifar10_framework",
    epsilon1=(8 / 255),
    alpha=(2 / 255),
    iterations=7,
    steps=15,
    c=0.15,
)

Found already trained model...
... loaded!


In [6]:
# Test the model
clean_test.test_trained_model(framework_model, testSetLoader)

Testing the model...


Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 86.03%


In [7]:
# Save the model
torch.save(framework_model, SAVE_LOAD_ROOT + "/cifar10_framework")

### Evaluation

#### FGSM

In [8]:
# Several values to use for the epsilons
epsilons = [0, 0.01, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        framework_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

Testing the model under FGSM Attack using epsilon = 0, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 86.03%
------------------------------------

Testing the model under FGSM Attack using epsilon = 0.01, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 74.4%
------------------------------------

Testing the model under FGSM Attack using epsilon = 0.05, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 36.6%
------------------------------------

Testing the model under FGSM Attack using epsilon = 0.1, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 22.54%
------------------------------------

Testing the model under FGSM Attack using epsilon = 0.2, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 20.49%
------------------------------------

Testing the model under FGSM Attack using epsilon = 0.35, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 20.63%
------------------------------------

Testing the model under FGSM Attack using epsilon = 0.55, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 19.13%
------------------------------------

Testing the model under FGSM Attack using epsilon = 0.75, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 18.59%
------------------------------------

Testing the model under FGSM Attack using epsilon = 1, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 18.37%
------------------------------------



#### PGD

In [9]:
# Several values to use for the epsilons
epsilons = [0, 0.01, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

for epsilon in epsilons:
    attacking.attack_model(
        framework_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )

Testing the model under PGD Attack using epsilon = 0, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 86.03%
------------------------------------

Testing the model under PGD Attack using epsilon = 0.01, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 73.28%
------------------------------------

Testing the model under PGD Attack using epsilon = 0.05, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 33.78%
------------------------------------

Testing the model under PGD Attack using epsilon = 0.1, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 24.27%
------------------------------------

Testing the model under PGD Attack using epsilon = 0.2, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 11.75%
------------------------------------

Testing the model under PGD Attack using epsilon = 0.35, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 2.61%
------------------------------------

Testing the model under PGD Attack using epsilon = 0.55, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 0.43%
------------------------------------

Testing the model under PGD Attack using epsilon = 0.75, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 0.27%
------------------------------------

Testing the model under PGD Attack using epsilon = 1, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 0.16%
------------------------------------



#### $CW_2$

In [10]:
step_nums = [15, 30, 50, 100]
cs = [0.05, 0.1, 0.3, 1]

for c in cs:
    for step_num in step_nums:
        cw_attack = torchattacks.CW(framework_model, c=c, steps=step_num)
        attacks["CW"] = cw_attack

        attacking.attack_model(
        framework_model,
        testSetLoader,
        "CW",
        attacks["CW"],
        library=True,
        )

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 74.77%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 70.59%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 70.06%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 69.97%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 58.14%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 54.65%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 53.16%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 52.14%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 36.57%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 24.23%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 19.44%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 16.48%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 26.3%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 11.05%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 6.51%
------------------------------------

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 4.09%
------------------------------------



## Detection Phase: PCA-based Detection

In [11]:
import numpy as np
import torch.nn as nn
from sklearn.decomposition import PCA
from tqdm.notebook import tnrange, tqdm

# Define the `device` PyTorch will be running on, please hope it is CUDA
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Notebook will use PyTorch Device: " + device.upper())

loss_function = nn.CrossEntropyLoss()

# Copy the CIFAR-10 data and then fit using PCA
# First convert to numpy arrays (and make it float)
numpyTrainingData = trainSetLoader.dataset.data.astype("float32")
# Note you also need to reshape the input data for your sanity
reshapedNumpyTrainingData = numpyTrainingData.reshape(
    (len(numpyTrainingData), 32 * 32 * 3)
)

# Then perform PCA on training data to get principal components
# Note it should reflect dimension of image, i.e. 28 * 28
pca = PCA(n_components=32 * 32 * 3).fit(reshapedNumpyTrainingData)

Notebook will use PyTorch Device: CUDA


#### Benign Data

In [12]:
# Now on clean data check if there are any adversarial samples
numpyTestData = testSetLoader.dataset.data.astype("float32")
reshapedNumpyTestData = numpyTestData.reshape((len(numpyTestData), 32 * 32 * 3))

# Original predictions on data
predictions_base = np.zeros((len(numpyTestData), ))

print("Original prediction...")
for index in range(len(numpyTestData)):
    testTensor = torch.from_numpy(np.reshape(numpyTestData[index], (1, 3, 32, 32))).to(
        device
    )

    with torch.no_grad():
        logits = framework_model(testTensor).detach().cpu().numpy()

    predictions_base[index] = np.argmax(logits)
print("Done")

# Transform clean data along principal components
transformedTestData = pca.transform(reshapedNumpyTestData)

# Decides how many of the least significant coefficients (of components) to perturb
num_components = 1000

# How many trials to run
num_trials = 25

# Track results
result = np.zeros(len(numpyTestData), dtype=int)

# Actual attempts
for trial in range(num_trials):
    random_noise = np.random.standard_normal(size=num_components)

    # Copy the data
    transformedTestDataNoisy = np.copy(transformedTestData)

    # Update the components with the right data
    for index in range(len(numpyTestData)):
        transformedTestDataNoisy[index][(32 * 32 * 3 - num_components) :] += (
            10 * random_noise
        )

    # Now calculate the inverse using PCA and the noise
    inverseTestDataNoisy = pca.inverse_transform(transformedTestDataNoisy)

    # Reshape into image
    testDataNoisy = np.reshape(inverseTestDataNoisy, (len(numpyTestData), 3, 32, 32))

    # Modified predictions on data
    predictions_modified = np.zeros((len(numpyTestData), ))

    for index in range(len(testDataNoisy)):
        testTensor = torch.from_numpy(
            np.reshape(testDataNoisy[index], (1, 3, 32, 32))
        ).to(device)

        with torch.no_grad():
            logits = framework_model(testTensor).detach().cpu().numpy()

        predictions_modified[index] = np.argmax(logits)

    check = np.not_equal(predictions_modified, predictions_base)
    result = np.logical_or(check, result)

# Printing
print(np.sum(result))

Original prediction...
Done
24


#### FGSM Data

In [13]:
# Now do the same on adversarial data check if there are any adversarial samples
# Use a pretty progress bar to show updates
data = []

for j, (images, labels) in enumerate(
    tqdm(testSetLoader, desc="Testing Progress", leave=False)
):
    # Cast to proper tensor
    images, labels = images.to(device), labels.to(device)

    # Perturb the images using the attack
    perturbed_images = fgsm.fgsm_attack(
        images,
        labels,
        framework_model,
        loss_function,
        epsilon=0.75,
        alpha=None,
        scale=True,
        iterations=None,
    )

    for perturbed_image in perturbed_images:
        data.append(perturbed_image.detach().cpu().numpy())

data = np.asarray(data)
numpyTestData = data.astype("float32")
reshapedNumpyTestData = numpyTestData.reshape((len(numpyTestData), 32 * 32 * 3))

# Original predictions on data
predictions_base = np.zeros((len(numpyTestData), ))

print("Original prediction...")
for index in range(len(numpyTestData)):
    testTensor = torch.from_numpy(np.reshape(numpyTestData[index], (1, 3, 32, 32))).to(
        device
    )

    with torch.no_grad():
        logits = framework_model(testTensor).detach().cpu().numpy()

    predictions_base[index] = np.argmax(logits)
print("Done")

# Transform clean data along principal components
transformedTestData = pca.transform(reshapedNumpyTestData)

# Decides how many of the least significant coefficients (of components) to perturb
num_components = 1000

# How many trials to run
num_trials = 25

# Track results
result = np.zeros(len(numpyTestData), dtype=int)

# Actual attempts
for trial in range(num_trials):
    random_noise = np.random.standard_normal(size=num_components)

    # Copy the data
    transformedTestDataNoisy = np.copy(transformedTestData)

    # Update the components with the right data
    for index in range(len(numpyTestData)):
        transformedTestDataNoisy[index][(32 * 32 * 3 - num_components) :] += (
            10 * random_noise
        )

    # Now calculate the inverse using PCA and the noise
    inverseTestDataNoisy = pca.inverse_transform(transformedTestDataNoisy)

    # Reshape into image
    testDataNoisy = np.reshape(inverseTestDataNoisy, (len(numpyTestData), 3, 32, 32))

    # Modified predictions on data
    predictions_modified = np.zeros((len(numpyTestData), ))

    for index in range(len(testDataNoisy)):
        testTensor = torch.from_numpy(
            np.reshape(testDataNoisy[index], (1, 3, 32, 32))
        ).to(device)

        with torch.no_grad():
            logits = framework_model(testTensor).detach().cpu().numpy()

        predictions_modified[index] = np.argmax(logits)

    check = np.not_equal(predictions_modified, predictions_base)
    result = np.logical_or(check, result)

# Printing
print(np.sum(result))

Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

Original prediction...
Done
9793


#### PGD Data

In [14]:
# Now do the same on adversarial data check if there are any adversarial samples
# Use a pretty progress bar to show updates
data = []

for j, (images, labels) in enumerate(
    tqdm(testSetLoader, desc="Testing Progress", leave=False)
):
    # Cast to proper tensor
    images, labels = images.to(device), labels.to(device)

    # Perturb the images using the attack
    perturbed_images = pgd.pgd_attack(
        images,
        labels,
        framework_model,
        loss_function,
        epsilon=0.75,
        alpha=(2 / 255),
        iterations=7,
        scale=True,
    )

    for perturbed_image in perturbed_images:
        data.append(perturbed_image.detach().cpu().numpy())

data = np.asarray(data)
numpyTestData = data.astype("float32")
reshapedNumpyTestData = numpyTestData.reshape((len(numpyTestData), 32 * 32 * 3))

# Original predictions on data
predictions_base = np.zeros((len(numpyTestData), ))

print("Original prediction...")
for index in range(len(numpyTestData)):
    testTensor = torch.from_numpy(np.reshape(numpyTestData[index], (1, 3, 32, 32))).to(
        device
    )

    with torch.no_grad():
        logits = framework_model(testTensor).detach().cpu().numpy()

    predictions_base[index] = np.argmax(logits)
print("Done")

# Transform clean data along principal components
transformedTestData = pca.transform(reshapedNumpyTestData)

# Decides how many of the least significant coefficients (of components) to perturb
num_components = 1000

# How many trials to run
num_trials = 25

# Track results
result = np.zeros(len(numpyTestData), dtype=int)

# Actual attempts
for trial in range(num_trials):
    random_noise = np.random.standard_normal(size=num_components)

    # Copy the data
    transformedTestDataNoisy = np.copy(transformedTestData)

    # Update the components with the right data
    for index in range(len(numpyTestData)):
        transformedTestDataNoisy[index][(32 * 32 * 3 - num_components) :] += (
            10 * random_noise
        )

    # Now calculate the inverse using PCA and the noise
    inverseTestDataNoisy = pca.inverse_transform(transformedTestDataNoisy)

    # Reshape into image
    testDataNoisy = np.reshape(inverseTestDataNoisy, (len(numpyTestData), 3, 32, 32))

    # Modified predictions on data
    predictions_modified = np.zeros((len(numpyTestData), ))

    for index in range(len(testDataNoisy)):
        testTensor = torch.from_numpy(
            np.reshape(testDataNoisy[index], (1, 3, 32, 32))
        ).to(device)

        with torch.no_grad():
            logits = framework_model(testTensor).detach().cpu().numpy()

        predictions_modified[index] = np.argmax(logits)

    check = np.not_equal(predictions_modified, predictions_base)
    result = np.logical_or(check, result)

# Printing
print(np.sum(result))

Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

Original prediction...
Done
9997


In [15]:
# Now do the same on adversarial data check if there are any adversarial samples
# Use a pretty progress bar to show updates
data = []

for j, (images, labels) in enumerate(
    tqdm(testSetLoader, desc="Testing Progress", leave=False)
):
    # Cast to proper tensor
    images, labels = images.to(device), labels.to(device)

    # Perturb the images using the attack
    perturbed_images = pgd.pgd_attack(
        images,
        labels,
        framework_model,
        loss_function,
        epsilon=0.001,
        alpha=(2 / 255),
        iterations=7,
        scale=True,
    )

    for perturbed_image in perturbed_images:
        data.append(perturbed_image.detach().cpu().numpy())

data = np.asarray(data)
numpyTestData = data.astype("float32")
reshapedNumpyTestData = numpyTestData.reshape((len(numpyTestData), 32 * 32 * 3))

# Original predictions on data
predictions_base = np.zeros((len(numpyTestData), ))

print("Original prediction...")
for index in range(len(numpyTestData)):
    testTensor = torch.from_numpy(np.reshape(numpyTestData[index], (1, 3, 32, 32))).to(
        device
    )

    with torch.no_grad():
        logits = framework_model(testTensor).detach().cpu().numpy()

    predictions_base[index] = np.argmax(logits)
print("Done")

# Transform clean data along principal components
transformedTestData = pca.transform(reshapedNumpyTestData)

# Decides how many of the least significant coefficients (of components) to perturb
num_components = 1000

# How many trials to run
num_trials = 25

# Track results
result = np.zeros(len(numpyTestData), dtype=int)

# Actual attempts
for trial in range(num_trials):
    random_noise = np.random.standard_normal(size=num_components)

    # Copy the data
    transformedTestDataNoisy = np.copy(transformedTestData)

    # Update the components with the right data
    for index in range(len(numpyTestData)):
        transformedTestDataNoisy[index][(32 * 32 * 3 - num_components) :] += (
            10 * random_noise
        )

    # Now calculate the inverse using PCA and the noise
    inverseTestDataNoisy = pca.inverse_transform(transformedTestDataNoisy)

    # Reshape into image
    testDataNoisy = np.reshape(inverseTestDataNoisy, (len(numpyTestData), 3, 32, 32))

    # Modified predictions on data
    predictions_modified = np.zeros((len(numpyTestData), ))

    for index in range(len(testDataNoisy)):
        testTensor = torch.from_numpy(
            np.reshape(testDataNoisy[index], (1, 3, 32, 32))
        ).to(device)

        with torch.no_grad():
            logits = framework_model(testTensor).detach().cpu().numpy()

        predictions_modified[index] = np.argmax(logits)

    check = np.not_equal(predictions_modified, predictions_base)
    result = np.logical_or(check, result)

# Printing
print(np.sum(result))

Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

Original prediction...
Done
9040


In [16]:
# Now do the same on adversarial data check if there are any adversarial samples
# Use a pretty progress bar to show updates
data = []

for j, (images, labels) in enumerate(
    tqdm(testSetLoader, desc="Testing Progress", leave=False)
):
    # Cast to proper tensor
    images, labels = images.to(device), labels.to(device)

    # Perturb the images using the attack
    perturbed_images = pgd.pgd_attack(
        images,
        labels,
        framework_model,
        loss_function,
        epsilon=0.85,
        alpha=(2 / 255),
        iterations=20,
        scale=True,
    )

    for perturbed_image in perturbed_images:
        data.append(perturbed_image.detach().cpu().numpy())

data = np.asarray(data)
numpyTestData = data.astype("float32")
reshapedNumpyTestData = numpyTestData.reshape((len(numpyTestData), 32 * 32 * 3))

# Original predictions on data
predictions_base = np.zeros((len(numpyTestData), ))

print("Original prediction...")
for index in range(len(numpyTestData)):
    testTensor = torch.from_numpy(np.reshape(numpyTestData[index], (1, 3, 32, 32))).to(
        device
    )

    with torch.no_grad():
        logits = framework_model(testTensor).detach().cpu().numpy()

    predictions_base[index] = np.argmax(logits)
print("Done")

# Transform clean data along principal components
transformedTestData = pca.transform(reshapedNumpyTestData)

# Decides how many of the least significant coefficients (of components) to perturb
num_components = 1000

# How many trials to run
num_trials = 25

# Track results
result = np.zeros(len(numpyTestData), dtype=int)

# Actual attempts
for trial in range(num_trials):
    random_noise = np.random.standard_normal(size=num_components)

    # Copy the data
    transformedTestDataNoisy = np.copy(transformedTestData)

    # Update the components with the right data
    for index in range(len(numpyTestData)):
        transformedTestDataNoisy[index][(32 * 32 * 3 - num_components) :] += (
            10 * random_noise
        )

    # Now calculate the inverse using PCA and the noise
    inverseTestDataNoisy = pca.inverse_transform(transformedTestDataNoisy)

    # Reshape into image
    testDataNoisy = np.reshape(inverseTestDataNoisy, (len(numpyTestData), 3, 32, 32))

    # Modified predictions on data
    predictions_modified = np.zeros((len(numpyTestData), ))

    for index in range(len(testDataNoisy)):
        testTensor = torch.from_numpy(
            np.reshape(testDataNoisy[index], (1, 3, 32, 32))
        ).to(device)

        with torch.no_grad():
            logits = framework_model(testTensor).detach().cpu().numpy()

        predictions_modified[index] = np.argmax(logits)

    check = np.not_equal(predictions_modified, predictions_base)
    result = np.logical_or(check, result)

# Printing
print(np.sum(result))

Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

Original prediction...
Done
10000


#### $CW_{2}$ Data

In [17]:
cw_attack = torchattacks.CW(framework_model, c=1, steps=100)

# Now do the same on adversarial data check if there are any adversarial samples
# Use a pretty progress bar to show updates
data = []

for j, (images, labels) in enumerate(
    tqdm(testSetLoader, desc="Testing Progress", leave=False)
):
    # Cast to proper tensor
    images, labels = images.to(device), labels.to(device)

    # Perturb the images using the attack
    perturbed_images = cw_attack(
        images,
        labels,
    )

    for perturbed_image in perturbed_images:
        data.append(perturbed_image.detach().cpu().numpy())

data = np.asarray(data)
numpyTestData = data.astype("float32")
reshapedNumpyTestData = numpyTestData.reshape((len(numpyTestData), 32 * 32 * 3))

# Original predictions on data
predictions_base = np.zeros((len(numpyTestData), ))

print("Original prediction...")
for index in range(len(numpyTestData)):
    testTensor = torch.from_numpy(np.reshape(numpyTestData[index], (1, 3, 32, 32))).to(
        device
    )

    with torch.no_grad():
        logits = framework_model(testTensor).detach().cpu().numpy()

    predictions_base[index] = np.argmax(logits)
print("Done")

# Transform clean data along principal components
transformedTestData = pca.transform(reshapedNumpyTestData)

# Decides how many of the least significant coefficients (of components) to perturb
num_components = 1000

# How many trials to run
num_trials = 25

# Track results
result = np.zeros(len(numpyTestData), dtype=int)

# Actual attempts
for trial in range(num_trials):
    random_noise = np.random.standard_normal(size=num_components)

    # Copy the data
    transformedTestDataNoisy = np.copy(transformedTestData)

    # Update the components with the right data
    for index in range(len(numpyTestData)):
        transformedTestDataNoisy[index][(32 * 32 * 3 - num_components) :] += (
            10 * random_noise
        )

    # Now calculate the inverse using PCA and the noise
    inverseTestDataNoisy = pca.inverse_transform(transformedTestDataNoisy)

    # Reshape into image
    testDataNoisy = np.reshape(inverseTestDataNoisy, (len(numpyTestData), 3, 32, 32))

    # Modified predictions on data
    predictions_modified = np.zeros((len(numpyTestData), ))

    for index in range(len(testDataNoisy)):
        testTensor = torch.from_numpy(
            np.reshape(testDataNoisy[index], (1, 3, 32, 32))
        ).to(device)

        with torch.no_grad():
            logits = framework_model(testTensor).detach().cpu().numpy()

        predictions_modified[index] = np.argmax(logits)

    check = np.not_equal(predictions_modified, predictions_base)
    result = np.logical_or(check, result)

# Printing
print(np.sum(result))

Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

Original prediction...
Done
9396


## Detection Phase: Mahalanobis-based Detection

Please see in the **defences/mahalanobis_detector/** folder how to run the Mahalanobis-based Detection component, as it has a separate procedure.