**This notebook focuses on the effectiveness of Total Variance Minimization against adversarial attacks on the MNIST and MARVEL datasets.**

## **Section 0 - Setting Up**

### **Load prerequisites**

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Activation, Dropout, Layer

from keras_radam import RAdam

import cv2
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from art import config
from art.attacks.evasion import FastGradientMethod, DeepFool, ProjectedGradientDescent, SaliencyMapMethod, CarliniL2Method, NewtonFool, BasicIterativeMethod
from art.defences.preprocessor import PixelDefend
from art.defences.trainer import AdversarialTrainer
from art.estimators.classification import KerasClassifier, TensorFlowV2Classifier, PyTorchClassifier
from art.utils import load_mnist

### Load PixelCNN

In [28]:
from __future__ import absolute_import, division, print_function, unicode_literals

import logging
import unittest

import numpy as np
import torch.nn as nn
import torch.optim as optim

from art.estimators.classification.pytorch import PyTorchClassifier
from art.defences.preprocessor import PixelDefend
from art.utils import load_mnist

from tests.utils import master_seed


class ModelImage(nn.Module):
    def __init__(self):
        super(ModelImage, self).__init__()
        self.fc = nn.Linear(25, 6400)

    def forward(self, x):
        x = x.view(-1, 25)
        logit_output = self.fc(x)
        logit_output = logit_output.view(-1, 5, 5, 1, 256)

        return logit_output

In [29]:
# Define the network
model = ModelImage()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
pixelcnn = PyTorchClassifier(
    model=model, loss=loss_fn, optimizer=optimizer, input_shape=(1, 28, 28), nb_classes=10, clip_values=(0, 1)
)
defence = PixelDefend(eps=5, pixel_cnn=pixelcnn)

In [30]:
(x_train, _), (_, _), _, _ = load_mnist()
x_train = x_train[:2, 10:15, 15:20, :]
x_train = x_train.astype(np.float32)

In [31]:
x_defended = defence(x_train)

### **Modification: Disabling eager execution to enable adversarial crafting**

In [None]:
tf.compat.v1.disable_eager_execution()

### **Load MARVEL dataset**

In [None]:
x_train = []
y_train = []
x_test_cln = []
y_test_cln = [] 
min_pixel_value = 0
max_pixel_value = 1

def marvel_class(filename):
    switcher={
        'HeavyLoadCarrier': [1,0,0,0,0,0,0,0,0],
        'CombatVessel': [0,1,0,0,0,0,0,0,0],
        'ContainerShip': [0,0,1,0,0,0,0,0,0],
        'PassengersShip': [0,0,0,1,0,0,0,0,0],
        'Ro-roCargo': [0,0,0,0,1,0,0,0,0],
        'Tanker': [0,0,0,0,0,1,0,0,0],
        'Tug': [0,0,0,0,0,0,1,0,0],
        'SupplyVessel': [0,0,0,0,0,0,0,1,0],
        'Yacht': [0,0,0,0,0,0,0,0,1]
    }
    return switcher.get(filename)

def load_training_data(filename):
    url = "/home/cyber/Desktop/Adrian/marvel_data/train_9/"+filename
    for imgname in os.listdir(url):
        img = cv2.imread(os.path.join(url,imgname))
        if img is not None:
            img = cv2.resize(img, (320,240))
            x_train_cln.append(img/255)
            y_train_cln.append(marvel_class(filename))
            i = i+1
        if i == 100:
            break
    return x_train_cln, y_train_cln

def load_test_data(filename):
    url = "/home/cyber/Desktop/Adrian/marvel_data/test_9/"+filename
    i = 0
    for imgname in os.listdir(url):
        img = cv2.imread(os.path.join(url,imgname))
        if img is not None:
            img = cv2.resize(img, (320,240))
            x_test_cln.append(img/255)
            y_test_cln.append(marvel_class(filename))
            i = i + 1
        if i == 100:
            break
    return x_test_cln, y_test_cln

# for filename in os.listdir("/home/cyber/Desktop/Adrian/marvel_data/train_9"):
#     load_training_data(filename)
#     print(filename)

for filename in os.listdir("/home/cyber/Desktop/Adrian/marvel_data/test_9"):
    load_test_data(filename)
    print(filename)
    
#load_training_data("/home/cyber/Desktop/Adrian/marvel_data/test_9/CombatVessel")


*Modification: Convert MARVEL x_test/x_train from uint8 into float32, to enable classification*

In [None]:
x_test_cln = np.array(x_test_cln, dtype=np.float32)

### **Load MNIST dataset**

In [22]:
(x_train_cln, y_train_cln), (x_test_cln, y_test_cln), min_pixel_value, max_pixel_value = load_mnist()
# x_test_cln, y_test_cln = x_test_cln[:1000], y_test_cln[:100]

### **Load / Create classifier model**

*MNIST pre-trained model*

In [None]:
model = load_model("/home/cyber/mnist_trained_model.h5")

*MARVEL pre-trained model*

In [None]:
model_path = "/home/cyber/Desktop/Adrian/Xception-10-0.74.hdf5"
model = load_model(model_path, custom_objects={'RAdam': RAdam}, compile=False)

*Optional step: Train and save a model for future use*

In [None]:
# model.fit(x_train_cln, y_train_cln, batch_size=64, epochs=10, verbose=True)

In [None]:
# model.save("/home/cyber/dataset_trained_model.h5")

*Create ART classifier*

In [None]:
classifier = KerasClassifier(model=model, clip_values=(min_pixel_value, max_pixel_value), use_logits=False)

## **Section 1 - Attack**

Step 1: Evaluate the classifier on benign test examples

In [None]:
predictions_cln = classifier.predict(x_test_cln)
accuracy_cln = np.sum(np.argmax(predictions_cln, axis=1) == np.argmax(y_test_cln, axis=1)) / len(y_test_cln)

print("Accuracy on benign test examples: {}%".format(accuracy_cln * 100))

Step 2: Split benign test examples into true and false positives

In [None]:
tp_cln_indexes=[]
fp_cln_indexes=[]
x_test_cln_tp=[]
y_test_cln_tp=[]
x_test_cln_fp=[]
y_test_cln_fp=[]

for k in range(len(predictions_cln)):
    if(np.argmax(predictions_cln, axis=1)[k] == np.argmax(y_test_cln, axis=1)[k]):
        tp_cln_indexes.append(k)
    else:
        fp_cln_indexes.append(k)

for k in tp_cln_indexes:
    x_test_cln_tp.append(x_test_cln[k])
    y_test_cln_tp.append(y_test_cln[k])
    
for k in fp_cln_indexes:
    x_test_cln_fp.append(x_test_cln[k])
    y_test_cln_fp.append(y_test_cln[k])
    
x_test_cln_tp = np.array(x_test_cln_tp)
x_test_cln_fp = np.array(x_test_cln_fp)

print('Number of benign true positives: {:}'.format(len(x_test_cln_tp)))
print('Number of benign false positives: {:}'.format(len(x_test_cln_fp)))

Step 3: Craft adversarial examples

*Jacobian-based Saliency Map Attack (JSMA)*

In [None]:
# adv_crafter = SaliencyMapMethod(classifier=classifier, theta = 0.1, gamma=0.3, verbose=True)
# x_test_JSMA_MARVEL = adv_crafter.generate(x_test_cln)
# %store x_test_JSMA_MARVEL

*Basic Iterative Method (BMI)*

In [None]:
# adv_crafter = BasicIterativeMethod(classifier, eps=0.1, eps_step=0.01, max_iter=30)
# x_test_BIM_MARVEL = adv_crafter.generate(x_test_cln)
# %store x_test_BIM_MARVEL

*Projected Gradient Descent (PGD)*

In [None]:
# adv_crafter = ProjectedGradientDescent(classifier, eps=0.1, eps_step=0.01, max_iter=30)
# x_test_PGD_MARVEL = adv_crafter.generate(x_test_cln)
# %store x_test_PGD_MARVEL

*NewtonFool*

In [None]:
# adv_crafter =  NewtonFool(classifier=classifier, eta=0.005, max_iter=25, verbose=True)
# x_test_Newton_MARVEL = adv_crafter.generate(x_test_cln)
# %store x_test_Newton_MARVEL

*DeepFool*

In [None]:
# adv_crafter = DeepFool(classifier=classifier, epsilon=1e-06/255, max_iter=50)
# x_test_Deep_MARVEL = adv_crafter.generate(x_test_cln)
# %store x_test_Deep_MARVEL

*Adversarial Examples*

In [None]:
%store -r x_test_JSMA_MNIST
x_test_adv = x_test_JSMA_MNIST

Step 4: Evaluate the classifier on the adversarial test set

In [None]:
predictions_adv = classifier.predict(x_test_adv)
accuracy_adv = np.sum(np.argmax(predictions_adv, axis=1) == np.argmax(y_test_cln, axis=1)) / len(y_test_cln)

print("Accuracy on adversarial test examples: {}%".format(accuracy_adv * 100))

Step 5: Split the adversarial test examples into true and false positives

In [None]:
tp_adv_indexes=[]
fp_adv_indexes=[]
x_test_adv_tp=[]
y_test_adv_tp=[]
x_test_adv_fp=[]
y_test_adv_fp=[]

for k in range(len(predictions_adv)):
    if(np.argmax(predictions_adv, axis=1)[k] == np.argmax(y_test_cln, axis=1)[k]):
        tp_adv_indexes.append(k)
    else:
        fp_adv_indexes.append(k)

for k in tp_adv_indexes:
    x_test_adv_tp.append(x_test_adv[k])
    y_test_adv_tp.append(y_test_cln[k])
    
for k in fp_adv_indexes:
    x_test_adv_fp.append(x_test_adv[k])
    y_test_adv_fp.append(y_test_cln[k])
    
x_test_adv_tp = np.array(x_test_adv_tp)
x_test_adv_fp = np.array(x_test_adv_fp)

print('Adversarial TP: {:}'.format(len(x_test_adv_tp)))
print('Adversarial FP: {:}'.format(len(x_test_adv_fp)))

Optional step: Plot benign samples and their adversarial counterparts

In [None]:
#plot images
plt.figure(figsize=(10, 10))
num = 3

for i in range(num):
    ax = plt.subplot(4, num, i + 1)
    plt.imshow(x_test_cln[i], cmap='gray')
    ax.set_title('{:}'.format(np.argmax(y_test_cln,axis=1)[i]))
    plt.axis("off")
    
    ax = plt.subplot(4, num, i + num + 1)
    plt.imshow(x_test_adv[i], cmap='gray')
    ax.set_title('{:}'.format(np.argmax(predictions_adv,axis=1)[i]))
    plt.axis("off")
    
plt.tight_layout()
plt.show()

## **Section 2 - Defence**

### **PixelDefend**

Step 1: Transform input

In [None]:
defence = PixelDefend(eps=5, pixel_cnn=pixelcnn, verbose=True)
x_test_cln_tp_pd = defence(x_test_cln_tp * 255)[0] / 255
x_test_cln_fp_pd = defence(x_test_cln_fp * 255)[0] / 255
x_test_adv_tp_pd = defence(x_test_adv_tp * 255)[0] / 255
x_test_adv_fp_pd = defence(x_test_adv_fp * 255)[0] / 255
x_test_cln_pd = defence(x_test_cln*255)[0] / 255
x_test_adv_pd = defence(x_test_adv*255)[0] / 255

Step 2: Evaluate the classifier on all 4 sets of data after PixelDefend

In [None]:
predictions_cln_pd = classifier.predict(x_test_cln_pd)
accuracy_cln_pd = np.sum(np.argmax(predictions_cln_pd, axis=1) == np.argmax(y_test_cln, axis=1)) / len(y_test_cln)

print("Effect of PixelDefend on entire benign test set: {:.2f}%".format((accuracy_cln_pd - accuracy_cln) * 100))
 
predictions_cln_tp_pd = classifier.predict(x_test_cln_tp_pd)
accuracy_cln_tp_pd = np.sum(np.argmax(predictions_cln_tp_pd, axis=1) == np.argmax(y_test_cln_tp, axis=1)) / len(y_test_cln_tp)

# print("\nAccuracy on true positive benign test examples after PixelDefend: {:.2f}%".format(accuracy_cln_tp_pd * 100))
print("\nAccuracy drop on true positive benign test examples after PixelDefend: {:.2f}%".format((1 - accuracy_cln_tp_pd) * 100))

predictions_cln_fp_pd = classifier.predict(x_test_cln_fp_pd)
accuracy_cln_fp_pd = np.sum(np.argmax(predictions_cln_fp_pd, axis=1) == np.argmax(y_test_cln_fp, axis=1)) / len(y_test_cln_fp)

print("\nAccuracy increase on false positive benign test examples after PixelDefend: {:.2f}%".format(accuracy_cln_fp_pd * 100))

predictions_adv_pd = classifier.predict(x_test_adv_pd)
accuracy_adv_pd = np.sum(np.argmax(predictions_adv_pd, axis=1) == np.argmax(y_test_cln, axis=1)) / len(y_test_cln)

print("\nEffect of PixelDefend on entire adversarial test set: {:.2f}%".format((accuracy_adv_pd-accuracy_adv) * 100))

predictions_adv_tp_pd = classifier.predict(x_test_adv_tp_pd)
accuracy_adv_tp_pd = np.sum(np.argmax(predictions_adv_tp_pd, axis=1) == np.argmax(y_test_adv_tp, axis=1)) / len(y_test_adv_tp)

# print("\nAccuracy on true positive adversarial test examples after PixelDefend: {:.2f}%".format(accuracy_adv_tp_pd * 100))
print("\nAccuracy drop on true positive adversarial test examples after PixelDefend: {:.2f}%".format((1 - accuracy_adv_tp_pd) * 100))

predictions_adv_fp_pd = classifier.predict(x_test_adv_fp_pd)
accuracy_adv_fp_pd = np.sum(np.argmax(predictions_adv_fp_pd, axis=1) == np.argmax(y_test_adv_fp, axis=1)) / len(y_test_adv_fp)

print("\nAccuracy increase on false positive adversarial test examples after PixelDefend: {:.2f}%".format(accuracy_adv_fp_pd * 100))

Optional step: Plot all data pre- and post-transformation

In [None]:
#plot images
predictions_cln_tp = classifier.predict(x_test_cln_tp)
predictions_cln_fp = classifier.predict(x_test_cln_fp)
predictions_adv_tp = classifier.predict(x_test_adv_tp)
predictions_adv_fp = classifier.predict(x_test_adv_fp)

plt.figure(figsize=(10, 10))

#Plot benign true positives
ax = plt.subplot(4, 2, 2*0+1)
plt.imshow(x_test_cln_tp[0], cmap='gray')
ax.set_title('Benign TP: {:}'.format(np.argmax(predictions_cln_tp,axis=1)[0]))
plt.axis("off")

ax = plt.subplot(4, 2, 2*0+2)
plt.imshow(x_test_cln_tp_pd[0], cmap='gray')
ax.set_title('Benign TP after PixelDefend: {:}'.format(np.argmax(predictions_cln_tp_pd,axis=1)[0]))
plt.axis("off")

#Plot benign false positives
ax = plt.subplot(4, 2, 2*1+1)
plt.imshow(x_test_cln_fp[0], cmap='gray')
ax.set_title('Benign FP: {:}\nTrue class: {:}'.format(np.argmax(predictions_cln_fp,axis=1)[0], np.argmax(y_test_cln_fp,axis=1)[0]), fontsize=20)
plt.axis("off")

ax = plt.subplot(4, 2, 2*1+2)
plt.imshow(x_test_cln_fp_pd[0], cmap='gray')
ax.set_title('Benign FP after PixelDefend: {:}\nTrue class: {:}'.format(np.argmax(predictions_cln_fp_pd,axis=1)[0], np.argmax(y_test_cln_fp,axis=1)[0]))
plt.axis("off")

#Plot adversarial true positives
ax = plt.subplot(4, 2, 2*2+1)
plt.imshow(x_test_adv_tp[0], cmap='gray')
ax.set_title('Adversarial TP: {:}'.format(np.argmax(predictions_adv_tp,axis=1)[0]))
plt.axis("off")

ax = plt.subplot(4, 2, 2*2+2)
plt.imshow(x_test_adv_tp_pd[0], cmap='gray')
ax.set_title('Adversarial TP after PixelDefend: {:}'.format(np.argmax(predictions_adv_tp_pd,axis=1)[0]))
plt.axis("off")

#Plot adversarial false positivies
ax = plt.subplot(4, 2, 2*3+1)
plt.imshow(x_test_adv_fp[0], cmap='gray')
ax.set_title('Adversarial FP: {:}\nTrue class: {:}'.format(np.argmax(predictions_adv_fp,axis=1)[0], np.argmax(y_test_adv_fp,axis=1)[0]))
plt.axis("off")

ax = plt.subplot(4, 2, 2*3+2)
plt.imshow(x_test_adv_fp_pd[0], cmap='gray')
ax.set_title('Adversarial FP after PixelDefend: {:}\nTrue class: {:}'.format(np.argmax(predictions_adv_fp_pd,axis=1)[0], np.argmax(y_test_adv_fp,axis=1)[0]))
plt.axis("off")
    
plt.tight_layout()
plt.show()

## Others

Optional step: Compare the performance of TotalVarMin against the adversary over a range of eps values

In [None]:
# eps_range = [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
# accuracy_original = []
# accuracy_robust = []

# adv_crafter = FastGradientMethod(classifier)
# adv_crafter_robust = FastGradientMethod(robust_classifier)

# for eps in eps_range:
#     adv_crafter.set_params(**{'eps': eps})
#     adv_crafter_robust.set_params(**{'eps': eps})
#     x_test_adv = adv_crafter.generate(x_test[:100])
#     x_test_adv_robust = adv_crafter_robust.generate(x_test[:100])
    
#     predictions_original = np.argmax(classifier.predict(x_test_adv), axis=1)
#     accuracy_original += [np.sum(predictions_original == np.argmax(y_test[:100], axis=1))]
    
#     predictions_robust = np.argmax(robust_classifier.predict(x_test_adv_robust), axis=1)
#     accuracy_robust += [np.sum(predictions_robust == np.argmax(y_test[:100], axis=1))]

# eps_range = eps_range

In [None]:
# fig, ax = plt.subplots()
# ax.plot(np.array(eps_range), np.array(accuracy_original), 'b--', label='Original classifier')
# ax.plot(np.array(eps_range), np.array(accuracy_robust), 'r--', label='Robust classifier')

# legend = ax.legend(loc='upper right', shadow=True, fontsize='large')
# #legend.get_frame().set_facecolor('#00FFCC')

# plt.xlabel('Attack strength (eps)')
# plt.ylabel('Accuracy (%)')
# plt.show()