# SVHN: Evaluation Section

## Imports and SVHN loading

In [1]:
# Imports all the module paths
import sys

import torch

sys.path.append("../../")

# Loads the rest of the modules

# File containing all the required training methods
import defences.cifar10 as defences

# For testing
import utils.clean_test as clean_test

# Contains the data loadders
import utils.dataloaders as dataloaders

# For printing outcomes
# import utils.printing as printing

# Example printing, but I removed it to simplify results
# for epsilon in epsilons:
#     printing.print_attack(
#         model,
#         testSetLoader,
#         "FGSM",
#         attacks["FGSM"],
#         epsilon=epsilon,
#     )

Notebook will use PyTorch Device: CUDA
Notebook will use PyTorch Device: CUDA
Notebook will use PyTorch Device: CUDA
Notebook will use PyTorch Device: CUDA
Notebook will use PyTorch Device: CUDA


## Load the dataset

In [2]:
DATA_ROOT = "../../datasets/SVHN"

trainSetLoader, _, testSetLoader = dataloaders.get_SVHN_data_loaders(
    DATA_ROOT,
    trainSetSize=63257,
    validationSetSize=0,
    batchSize=128,
)

Using downloaded and verified file: ../../datasets/SVHN/train_32x32.mat
Using downloaded and verified file: ../../datasets/SVHN/extra_32x32.mat
Using downloaded and verified file: ../../datasets/SVHN/train_32x32.mat
Using downloaded and verified file: ../../datasets/SVHN/test_32x32.mat


## Save path

In [3]:
SAVE_LOAD_ROOT = "../../models_data/SVHN"

## Load the Attacks For Training

In [4]:
# A possible attacks array (for nice printing):
# Some attacks use a helper library
import torchattacks

import attacks.fgsm as fgsm
import attacks.ifgsm as ifgsm
import attacks.pgd as pgd
import utils.attacking as attacking

attacks = {}

attacks["FGSM"] = fgsm.fgsm_attack
attacks["I-FGSM"] = ifgsm.ifgsm_attack
attacks["PGD"] = pgd.pgd_attack

## Baseline performance on standard model

In [5]:
standard_model = defences.standard_training(
    trainSetLoader,
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/svhn_standard",
    long_training=False,
)

Found already trained model...
... loaded!


In [6]:
# Test the model
clean_test.test_trained_model(standard_model, testSetLoader)

Testing the model...


Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 95.09%


In [7]:
# Save the model
torch.save(standard_model, SAVE_LOAD_ROOT + "/svhn_standard")

## Training Phase of Framework

In [8]:
framework_trained_model = defences.framework_training(
    trainSetLoader,
    attack_function1=attacks["PGD"],
    attack_function2=None,
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/svhn_framework",
    epsilon1=(8 / 255),
    alpha=(2 / 255),
    iterations=7,
    steps=15,
    c=0.15,
)

Found already trained model...
... loaded!


In [9]:
# Test the model
clean_test.test_trained_model(framework_trained_model, testSetLoader)

Testing the model...


Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 89.66%


In [10]:
# Save the model
torch.save(framework_trained_model, SAVE_LOAD_ROOT + "/svhn_framework")

In [11]:
# Several values to use for the epsilons
epsilons = [0, 0.01, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [12]:
# Run test for each epsilon
for epsilon in epsilons:
    attacking.attack_model(
        framework_trained_model,
        testSetLoader,
        "FGSM",
        attacks["FGSM"],
        epsilon=epsilon,
    )

Testing the model under FGSM Attack using epsilon = 0, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 89.66%


Testing the model under FGSM Attack using epsilon = 0.01, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 77.32%


Testing the model under FGSM Attack using epsilon = 0.05, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 45.26%


Testing the model under FGSM Attack using epsilon = 0.1, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 34.98%


Testing the model under FGSM Attack using epsilon = 0.2, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 34.04%


Testing the model under FGSM Attack using epsilon = 0.35, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 34.32%


Testing the model under FGSM Attack using epsilon = 0.55, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 36.06%


Testing the model under FGSM Attack using epsilon = 0.75, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 37.22%


Testing the model under FGSM Attack using epsilon = 1, alpha = None...


FGSM Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 37.48%




In [13]:
# Several values to use for the epsilons
epsilons = [0, 0.01, 4 / 255, 0.05, 0.1, 0.2, 0.35, 0.55, 0.75, 1]

In [14]:
for epsilon in epsilons:
    attacking.attack_model(
        framework_trained_model,
        testSetLoader,
        "PGD",
        attacks["PGD"],
        epsilon=epsilon,
        alpha=(2 / 255),
        iterations=7,
    )

Testing the model under PGD Attack using epsilon = 0, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 89.66%


Testing the model under PGD Attack using epsilon = 0.01, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 74.94%


Testing the model under PGD Attack using epsilon = 0.01568627450980392, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 64.44%


Testing the model under PGD Attack using epsilon = 0.05, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 36.73%


Testing the model under PGD Attack using epsilon = 0.1, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 29.11%


Testing the model under PGD Attack using epsilon = 0.2, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 17.08%


Testing the model under PGD Attack using epsilon = 0.35, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 5.57%


Testing the model under PGD Attack using epsilon = 0.55, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 0.84%


Testing the model under PGD Attack using epsilon = 0.75, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 0.1%


Testing the model under PGD Attack using epsilon = 1, alpha = 0.00784313725490196...


PGD Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 0.0%




In [15]:
# deepfool_attack = torchattacks.DeepFool(framework_trained_model, steps=10)
# attacks["DeepFool"] = deepfool_attack

In [16]:
# attacking.attack_model(
#     framework_trained_model,
#     testSetLoader,
#     "DeepFool",
#     attacks["DeepFool"],
#     library=True,
# )

In [17]:
cw_attack = torchattacks.CW(framework_trained_model, c=1, steps=50)
attacks["CW"] = cw_attack

In [18]:
attacking.attack_model(
    framework_trained_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 8.26%




In [19]:
cw_attack = torchattacks.CW(framework_trained_model, c=0.1, steps=50)
attacks["CW"] = cw_attack

In [20]:
attacking.attack_model(
    framework_trained_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 36.25%




In [21]:
cw_attack = torchattacks.CW(framework_trained_model, c=0.1, steps=25)
attacks["CW"] = cw_attack

In [22]:
attacking.attack_model(
    framework_trained_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 40.72%




In [23]:
cw_attack = torchattacks.CW(framework_trained_model, c=0.1, steps=15)
attacks["CW"] = cw_attack

In [24]:
attacking.attack_model(
    framework_trained_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 47.46%




In [25]:
cw_attack = torchattacks.CW(framework_trained_model, c=0.05, steps=10)
attacks["CW"] = cw_attack

In [26]:
attacking.attack_model(
    framework_trained_model,
    testSetLoader,
    "CW",
    attacks["CW"],
    library=True,
)

Testing the model under CW Attack...


CW Attack Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 79.75%




## PCA detection

In [27]:
import numpy as np
import torch.nn as nn
from sklearn.decomposition import PCA
from tqdm.notebook import tnrange, tqdm

# Define the `device` PyTorch will be running on, please hope it is CUDA
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Notebook will use PyTorch Device: " + device.upper())

loss_function = nn.CrossEntropyLoss()

# Copy the CIFAR-10 data and then fit using PCA
# First convert to numpy arrays (and make it float)
numpyTrainingData = trainSetLoader.dataset.data.astype("float32")
# Note you also need to reshape the input data for your sanity
reshapedNumpyTrainingData = numpyTrainingData.reshape(
    (len(numpyTrainingData), 32 * 32 * 3)
)

# Then perform PCA on training data to get principal components
# Note it should reflect dimension of image, i.e. 28 * 28
pca = PCA(n_components=32 * 32 * 3).fit(reshapedNumpyTrainingData)

Notebook will use PyTorch Device: CUDA


In [32]:
# Now on clean data check if there are any adversarial samples
numpyTestData = testSetLoader.dataset.data.astype("float32")
reshapedNumpyTestData = numpyTestData.reshape((len(numpyTestData), 32 * 32 * 3))

# Original predictions on data
predictions_base = np.zeros((len(numpyTestData), ))

print("Original prediction...")
for index in range(len(numpyTestData)):
    testTensor = torch.from_numpy(np.reshape(numpyTestData[index], (1, 3, 32, 32))).to(
        device
    )

    with torch.no_grad():
        logits = framework_trained_model(testTensor).detach().cpu().numpy()

    predictions_base[index] = np.argmax(logits)
print("Done")

# Transform clean data along principal components
transformedTestData = pca.transform(reshapedNumpyTestData)

# Decides how many of the least significant coefficients (of components) to perturb
num_components = 750

# How many trials to run
num_trials = 25

# Track results
result = np.zeros(len(numpyTestData), dtype=int)

# Actual attempts
for trial in range(num_trials):
    print("Trial {}".format(trial))
    random_noise = np.random.standard_normal(size=num_components)

    # Copy the data
    transformedTestDataNoisy = np.copy(transformedTestData)

    # Update the components with the right data
    for index in range(len(numpyTestData)):
        transformedTestDataNoisy[index][(32 * 32 * 3 - num_components) :] += (
            7 * random_noise
        )

    # Now calculate the inverse using PCA and the noise
    inverseTestDataNoisy = pca.inverse_transform(transformedTestDataNoisy)

    # Reshape into image
    testDataNoisy = np.reshape(inverseTestDataNoisy, (len(numpyTestData), 3, 32, 32))

    # Modified predictions on data
    predictions_modified = np.zeros((len(numpyTestData), ))

    for index in range(len(testDataNoisy)):
        testTensor = torch.from_numpy(
            np.reshape(testDataNoisy[index], (1, 3, 32, 32))
        ).to(device)

        with torch.no_grad():
            logits = framework_trained_model(testTensor).detach().cpu().numpy()

        predictions_modified[index] = np.argmax(logits)

    check = np.not_equal(predictions_modified, predictions_base)
    result = np.logical_or(check, result)

# Printing
print(np.sum(result))

Original prediction...
Done
Trial 0
Trial 1
Trial 2
Trial 3
Trial 4
Trial 5
Trial 6
Trial 7
Trial 8
Trial 9
Trial 10
Trial 11
Trial 12
Trial 13
Trial 14
Trial 15
Trial 16
Trial 17
Trial 18
Trial 19
Trial 20
Trial 21
Trial 22
Trial 23
Trial 24
2270


In [30]:
# Now do the same on adversarial data check if there are any adversarial samples
# Use a pretty progress bar to show updates
data = []

for j, (images, labels) in enumerate(
    tqdm(testSetLoader, desc="Testing Progress", leave=False)
):
    # Cast to proper tensor
    images, labels = images.to(device), labels.to(device)

    # Perturb the images using the attack
    perturbed_images = fgsm.fgsm_attack(
        images,
        labels,
        framework_trained_model,
        loss_function,
        epsilon=0.75,
        alpha=None,
        scale=True,
        iterations=None,
    )

    for perturbed_image in perturbed_images:
        data.append(perturbed_image.detach().cpu().numpy())

data = np.asarray(data)
numpyTestData = data.astype("float32")
reshapedNumpyTestData = numpyTestData.reshape((len(numpyTestData), 32 * 32 * 3))

# Original predictions on data
predictions_base = np.zeros((len(numpyTestData), ))

print("Original prediction...")
for index in range(len(numpyTestData)):
    testTensor = torch.from_numpy(np.reshape(numpyTestData[index], (1, 3, 32, 32))).to(
        device
    )

    with torch.no_grad():
        logits = standard_model(testTensor).detach().cpu().numpy()

    predictions_base[index] = np.argmax(logits)
print("Done")

# Transform clean data along principal components
transformedTestData = pca.transform(reshapedNumpyTestData)

# Decides how many of the least significant coefficients (of components) to perturb
num_components = 1000

# How many trials to run
num_trials = 25

# Track results
result = np.zeros(len(numpyTestData), dtype=int)

# Actual attempts
for trial in range(num_trials):
    print("Trial {}".format(trial))
    random_noise = np.random.standard_normal(size=num_components)

    # Copy the data
    transformedTestDataNoisy = np.copy(transformedTestData)

    # Update the components with the right data
    for index in range(len(numpyTestData)):
        transformedTestDataNoisy[index][(32 * 32 * 3 - num_components) :] += (
            10 * random_noise
        )

    # Now calculate the inverse using PCA and the noise
    inverseTestDataNoisy = pca.inverse_transform(transformedTestDataNoisy)

    # Reshape into image
    testDataNoisy = np.reshape(inverseTestDataNoisy, (len(numpyTestData), 3, 32, 32))

    # Modified predictions on data
    predictions_modified = np.zeros((len(numpyTestData), ))

    for index in range(len(testDataNoisy)):
        testTensor = torch.from_numpy(
            np.reshape(testDataNoisy[index], (1, 3, 32, 32))
        ).to(device)

        with torch.no_grad():
            logits = standard_model(testTensor).detach().cpu().numpy()

        predictions_modified[index] = np.argmax(logits)

    check = np.not_equal(predictions_modified, predictions_base)
    result = np.logical_or(check, result)

# Printing
print(np.sum(result))

Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

Original prediction...
Done
Trial 0
Trial 1
Trial 2
Trial 3
Trial 4
Trial 5
Trial 6
Trial 7
Trial 8
Trial 9
Trial 10
Trial 11
Trial 12
Trial 13
Trial 14
Trial 15
Trial 16
Trial 17
Trial 18
Trial 19
Trial 20
Trial 21
Trial 22
Trial 23
Trial 24
9190
