# MNIST: Training and Testing on a Clean Dataset & Adversarial Detection

## Imports and MNIST loading

In [16]:
# Imports all the module paths
import sys

import numpy as np
import torch
from torch.autograd import Variable
from tqdm.notebook import tnrange, tqdm

sys.path.append("../../")

# Loads the rest of the modules

# File containing all the required training methods
import defences.mnist as defences

# For testing
import utils.clean_test as clean_test

# Contains the data loadders
import utils.dataloaders as dataloaders

# For printing outcomes
# import utils.printing as printing

# Example printing, but I removed it to simplify results
# for epsilon in epsilons:
#     printing.print_attack(
#         model,
#         testSetLoader,
#         "FGSM",
#         attacks["FGSM"],
#         epsilon=epsilon,
#     )

In [12]:
# Define the `device` PyTorch will be running on, please hope it is CUDA
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Notebook will use PyTorch Device: " + device.upper())

Notebook will use PyTorch Device: CUDA


## Load the dataset

In [2]:
DATA_ROOT = "../../datasets/"

trainSetLoader, _, testSetLoader = dataloaders.get_MNIST_data_loaders(
    DATA_ROOT,
    trainSetSize=50000,
    validationSetSize=0,
    batchSize=128,
)

## Standard Training

In [3]:
SAVE_LOAD_ROOT = "../../models_data/MNIST"

model = defences.standard_training(
    trainSetLoader,
    load_if_available=True,
    load_path=SAVE_LOAD_ROOT + "/mnist_standard_with_feature_list",
)

Found already trained model...
... loaded!


In [4]:
# Test the model
clean_test.test_trained_model(model, testSetLoader)

Testing the model...


Testing Progress:   0%|          | 0/79 [00:00<?, ?it/s]

... done! Accuracy: 99.23%


In [5]:
# Save the model
torch.save(model, SAVE_LOAD_ROOT + "/mnist_standard_with_feature_list")

## Attacks and Their Results

In [6]:
# A possible attacks array (for nice printing):
# Some attacks use a helper library
import torchattacks

import attacks.fgsm as fgsm
import attacks.ifgsm as ifgsm
import attacks.pgd as pgd
import utils.attacking as attacking

attacks = {}

attacks["FGSM"] = fgsm.fgsm_attack
attacks["I-FGSM"] = ifgsm.ifgsm_attack
attacks["PGD"] = pgd.pgd_attack

In [7]:
# Produce some FGSM data that we then use to extract features
random_noise_size = 0.25

## Mahalanobis Distance

In [None]:
def generate_mahalanobis_score(
    model,
    trainSetLoader,
    num_classes,
    outf,
    net_type,
    sample_mean,
    precision,
    layer_index,
    magnitude,
):
    """
    Compute the proposed Mahalanobis confidence score on adversarial samples
    return: Mahalanobis score from layer_index
    """

    Mahalanobis = []
    batch_size = 128
    total = 0
    # Go through the data to produce the scores
    for _, (images, labels) in enumerate(tqdm(trainSetLoader, desc="Batches")):
        # Cast to proper tensors
        images, labels = images.to(device), labels.to(device)
        images, labels = torch.autograd.Variable(images, requires_grad=True), Variable(labels)

    for data_index in range(int(np.floor(test_data.size(0) / batch_size))):
        target = test_label[total : total + batch_size].cuda()
        data = test_data[total : total + batch_size].cuda()
        total += batch_size
        data, target = torch.autograd.Variable(data, requires_grad=True), Variable(target)
        
        out_features = model.intermediate_forward(data, layer_index)
        out_features = out_features.view(out_features.size(0), out_features.size(1), -1)
        out_features = torch.mean(out_features, 2)

        gaussian_score = 0
        for i in range(num_classes):
            batch_sample_mean = sample_mean[layer_index][i]
            zero_f = out_features.data - batch_sample_mean
            term_gau = (
                -0.5
                * torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
            )
            if i == 0:
                gaussian_score = term_gau.view(-1, 1)
            else:
                gaussian_score = torch.cat((gaussian_score, term_gau.view(-1, 1)), 1)

        # Input_processing
        sample_pred = gaussian_score.max(1)[1]
        batch_sample_mean = sample_mean[layer_index].index_select(0, sample_pred)
        zero_f = out_features - Variable(batch_sample_mean)
        pure_gau = (
            -0.5
            * torch.mm(
                torch.mm(zero_f, Variable(precision[layer_index])), zero_f.t()
            ).diag()
        )
        loss = torch.mean(-pure_gau)
        loss.backward()

        gradient = torch.ge(data.grad.data, 0)
        gradient = (gradient.float() - 0.5) * 2
        if net_type == "densenet":
            gradient.index_copy_(
                1,
                torch.LongTensor([0]).cuda(),
                gradient.index_select(1, torch.LongTensor([0]).cuda()) / (63.0 / 255.0),
            )
            gradient.index_copy_(
                1,
                torch.LongTensor([1]).cuda(),
                gradient.index_select(1, torch.LongTensor([1]).cuda()) / (62.1 / 255.0),
            )
            gradient.index_copy_(
                1,
                torch.LongTensor([2]).cuda(),
                gradient.index_select(1, torch.LongTensor([2]).cuda()) / (66.7 / 255.0),
            )
        elif net_type == "resnet":
            gradient.index_copy_(
                1,
                torch.LongTensor([0]).cuda(),
                gradient.index_select(1, torch.LongTensor([0]).cuda()) / (0.2023),
            )
            gradient.index_copy_(
                1,
                torch.LongTensor([1]).cuda(),
                gradient.index_select(1, torch.LongTensor([1]).cuda()) / (0.1994),
            )
            gradient.index_copy_(
                1,
                torch.LongTensor([2]).cuda(),
                gradient.index_select(1, torch.LongTensor([2]).cuda()) / (0.2010),
            )
        tempInputs = torch.add(data.data, -magnitude, gradient)

        noise_out_features = model.intermediate_forward(
            Variable(tempInputs, volatile=True), layer_index
        )
        noise_out_features = noise_out_features.view(
            noise_out_features.size(0), noise_out_features.size(1), -1
        )
        noise_out_features = torch.mean(noise_out_features, 2)
        noise_gaussian_score = 0
        for i in range(num_classes):
            batch_sample_mean = sample_mean[layer_index][i]
            zero_f = noise_out_features.data - batch_sample_mean
            term_gau = (
                -0.5
                * torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
            )
            if i == 0:
                noise_gaussian_score = term_gau.view(-1, 1)
            else:
                noise_gaussian_score = torch.cat(
                    (noise_gaussian_score, term_gau.view(-1, 1)), 1
                )

        noise_gaussian_score, _ = torch.max(noise_gaussian_score, dim=1)
        Mahalanobis.extend(noise_gaussian_score.cpu().numpy())

    return Mahalanobis

        out_features = model.intermediate_forward(data, layer_index)
        out_features = out_features.view(out_features.size(0), out_features.size(1), -1)
        out_features = torch.mean(out_features, 2)

        gaussian_score = 0
        for i in range(num_classes):
            batch_sample_mean = sample_mean[layer_index][i]
            zero_f = out_features.data - batch_sample_mean
            term_gau = (
                -0.5
                * torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
            )
            if i == 0:
                gaussian_score = term_gau.view(-1, 1)
            else:
                gaussian_score = torch.cat((gaussian_score, term_gau.view(-1, 1)), 1)

        # Input_processing
        sample_pred = gaussian_score.max(1)[1]
        batch_sample_mean = sample_mean[layer_index].index_select(0, sample_pred)
        zero_f = out_features - Variable(batch_sample_mean)
        pure_gau = (
            -0.5
            * torch.mm(
                torch.mm(zero_f, Variable(precision[layer_index])), zero_f.t()
            ).diag()
        )
        loss = torch.mean(-pure_gau)
        loss.backward()

        gradient = torch.ge(data.grad.data, 0)
        gradient = (gradient.float() - 0.5) * 2
        if net_type == "densenet":
            gradient.index_copy_(
                1,
                torch.LongTensor([0]).cuda(),
                gradient.index_select(1, torch.LongTensor([0]).cuda()) / (63.0 / 255.0),
            )
            gradient.index_copy_(
                1,
                torch.LongTensor([1]).cuda(),
                gradient.index_select(1, torch.LongTensor([1]).cuda()) / (62.1 / 255.0),
            )
            gradient.index_copy_(
                1,
                torch.LongTensor([2]).cuda(),
                gradient.index_select(1, torch.LongTensor([2]).cuda()) / (66.7 / 255.0),
            )
        elif net_type == "resnet":
            gradient.index_copy_(
                1,
                torch.LongTensor([0]).cuda(),
                gradient.index_select(1, torch.LongTensor([0]).cuda()) / (0.2023),
            )
            gradient.index_copy_(
                1,
                torch.LongTensor([1]).cuda(),
                gradient.index_select(1, torch.LongTensor([1]).cuda()) / (0.1994),
            )
            gradient.index_copy_(
                1,
                torch.LongTensor([2]).cuda(),
                gradient.index_select(1, torch.LongTensor([2]).cuda()) / (0.2010),
            )
        tempInputs = torch.add(data.data, -magnitude, gradient)

        noise_out_features = model.intermediate_forward(
            Variable(tempInputs, volatile=True), layer_index
        )
        noise_out_features = noise_out_features.view(
            noise_out_features.size(0), noise_out_features.size(1), -1
        )
        noise_out_features = torch.mean(noise_out_features, 2)
        noise_gaussian_score = 0
        for i in range(num_classes):
            batch_sample_mean = sample_mean[layer_index][i]
            zero_f = noise_out_features.data - batch_sample_mean
            term_gau = (
                -0.5
                * torch.mm(torch.mm(zero_f, precision[layer_index]), zero_f.t()).diag()
            )
            if i == 0:
                noise_gaussian_score = term_gau.view(-1, 1)
            else:
                noise_gaussian_score = torch.cat(
                    (noise_gaussian_score, term_gau.view(-1, 1)), 1
                )

        noise_gaussian_score, _ = torch.max(noise_gaussian_score, dim=1)
        Mahalanobis.extend(noise_gaussian_score.cpu().numpy())

    return Mahalanobis

In [17]:
# Extract the Mahalanobis distance features

# Step 1: set the extraction parameters
fake_input = torch.rand(2, 1, 28, 28).to(device)
fake_input = Variable(fake_input)

# Step 2: produce the feature list
_, layer_list = model.feature_list(fake_input)
num_output = len(layer_list)
feature_list = np.empty(num_output)

# Count the number of features
count = 0

# Track number of features and size (i.e. neurons really)
for layer in layer_list:
    feature_list[count] = layer.size(1)
    count += 1

In [None]:
# Several magnitudes to experiment with
magnitude_list = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005]

In [None]:
for magnitude in magnitude_list:
    print("\nNoise: " + str(magnitude))
    for i in range(num_output):
        M_in = lib_generation.get_Mahalanobis_score_adv(
            model,
            test_clean_data,
            test_label,
            args.num_classes,
            args.outf,
            args.net_type,
            sample_mean,
            precision,
            i,
            magnitude,
        )
        M_in = np.asarray(M_in, dtype=np.float32)
        if i == 0:
            Mahalanobis_in = M_in.reshape((M_in.shape[0], -1))
        else:
            Mahalanobis_in = np.concatenate(
                (Mahalanobis_in, M_in.reshape((M_in.shape[0], -1))), axis=1
            )

    for i in range(num_output):
        M_out = lib_generation.get_Mahalanobis_score_adv(
            model,
            test_adv_data,
            test_label,
            args.num_classes,
            args.outf,
            args.net_type,
            sample_mean,
            precision,
            i,
            magnitude,
        )
        M_out = np.asarray(M_out, dtype=np.float32)
        if i == 0:
            Mahalanobis_out = M_out.reshape((M_out.shape[0], -1))
        else:
            Mahalanobis_out = np.concatenate(
                (Mahalanobis_out, M_out.reshape((M_out.shape[0], -1))), axis=1
            )

    for i in range(num_output):
        M_noisy = lib_generation.get_Mahalanobis_score_adv(
            model,
            test_noisy_data,
            test_label,
            args.num_classes,
            args.outf,
            args.net_type,
            sample_mean,
            precision,
            i,
            magnitude,
        )
        M_noisy = np.asarray(M_noisy, dtype=np.float32)
        if i == 0:
            Mahalanobis_noisy = M_noisy.reshape((M_noisy.shape[0], -1))
        else:
            Mahalanobis_noisy = np.concatenate(
                (Mahalanobis_noisy, M_noisy.reshape((M_noisy.shape[0], -1))), axis=1
            )
    Mahalanobis_in = np.asarray(Mahalanobis_in, dtype=np.float32)
    Mahalanobis_out = np.asarray(Mahalanobis_out, dtype=np.float32)
    Mahalanobis_noisy = np.asarray(Mahalanobis_noisy, dtype=np.float32)
    Mahalanobis_pos = np.concatenate((Mahalanobis_in, Mahalanobis_noisy))

    Mahalanobis_data, Mahalanobis_labels = lib_generation.merge_and_generate_labels(
        Mahalanobis_out, Mahalanobis_pos
    )
    file_name = os.path.join(
        args.outf,
        "Mahalanobis_%s_%s_%s.npy" % (str(magnitude), args.dataset, args.adv_type),
    )

    Mahalanobis_data = np.concatenate((Mahalanobis_data, Mahalanobis_labels), axis=1)
    np.save(file_name, Mahalanobis_data)