# Adversarial Attacks

## Install and import dependencies

In [None]:
# If you want to run this outside of Colab you will need to install the
# appropriate libraries, e.g, Pytorch, etc.
! pip install adversarial-robustness-toolbox
! pip install IPython
! pillow
! pip install yaspin

! rm -rf repo 
! git clone https://github.com/jalane76/adversarial-attacks-tutorial.git repo

In [None]:
from art.attacks.evasion import FastGradientMethod
from art.estimators.classification import PyTorchClassifier
from art.utils import load_mnist
import IPython
from IPython.display import display
from IPython.display import Image as DisplayImage
import matplotlib
from matplotlib.colors import TwoSlopeNorm
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image as PILImage
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from yaspin import yaspin
from yaspin.spinners import Spinners

## Set up app parameters and helpers

In [None]:
rand_seed = 978614566
np.random.seed(rand_seed)
torch.manual_seed(rand_seed)

image_width = 28
image_height = 28
input_shape = (1, 28, 28)

batch_size = 64
num_epochs = 3
num_labels = 10

def increase_font():
  from IPython.display import Javascript
  display(Javascript('''
    for (rule of document.styleSheets[0].cssRules){
      if (rule.selectorText=='body') {
        rule.style.fontSize = '30px'
        break
      }
    }
  '''))

## We'll use the MNIST dataset so load it

In [None]:
with yaspin(spinner=Spinners.aesthetic, text="Loading MNIST data..."):
  (
    (x_train, y_train),
    (x_test, y_test),
    min_pixel_value,
    max_pixel_value,
  ) = load_mnist()

  clip_values = (min_pixel_value, max_pixel_value)

  # Swap axes to PyTorch's NCHW format
  x_train = np.transpose(x_train, (0, 3, 1, 2)).astype(np.float32)
  x_test = np.transpose(x_test, (0, 3, 1, 2)).astype(np.float32)

  # Invert grayscale for black characters on a white background
  x_train = max_pixel_value - x_train
  x_test = max_pixel_value - x_test

print(f"  Training data shape: {x_train.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"      Test data shape: {x_test.shape}")
print(f"    Test labels shape: {y_test.shape}")


## Let's see the first few benign samples

In [None]:
with yaspin(spinner=Spinners.aesthetic, text="Plotting benign samples..."):
  num_samples = 5
  num_rows = 1

  fig, axes = plt.subplots(num_rows, num_samples, sharex=True, sharey=True, squeeze=False)
  fig.set_figheight(4.0 * num_rows)
  fig.set_figwidth(4.0 * num_samples)
  for sample_idx in range(num_samples):
    sample_axis = axes[0, sample_idx]
    sample = x_train[sample_idx, 0, :, :]
    sample_axis.imshow(
      sample, cmap="gray", aspect="equal", interpolation="nearest"
    )

## Define the neural network and create an ART classifier

In [None]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv_1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5, stride=1)
    self.conv_2 = nn.Conv2d(in_channels=4, out_channels=10, kernel_size=5, stride=1)
    self.fc_1 = nn.Linear(in_features=4 * 4 * 10, out_features=100)
    self.fc_2 = nn.Linear(in_features=100, out_features=10)

  def forward(self, x):
    x = F.relu(self.conv_1(x))
    x = F.max_pool2d(x, 2, 2)
    x = F.relu(self.conv_2(x))
    x = F.max_pool2d(x, 2, 2)
    x = x.view(-1, 4 * 4 * 10)
    x = F.relu(self.fc_1(x))
    x = self.fc_2(x)
    return x

model = Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

classifier = PyTorchClassifier(
  model=model,
  clip_values=clip_values,
  loss=criterion,
  optimizer=optimizer,
  input_shape=(input_shape),
  nb_classes=num_labels,
)

## Train the classifier

In [None]:
with yaspin(spinner=Spinners.aesthetic, text="Training..."):
  classifier.fit(x_train, y_train, batch_size=batch_size, nb_epochs=num_epochs)

## Test the accuracy on benign samples

In [None]:
predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
increase_font()
print(f"Benign accuracy: {accuracy * 100:.2f}%")

display(DisplayImage(open('repo/airplane-thumbs-up.gif','rb').read()))

#    

## Now generate FGSM attacks

In [None]:
with yaspin(spinner=Spinners.aesthetic, text="Generating attacks..."):
  attack = FastGradientMethod(estimator=classifier, eps=0.2)
  x_test_adv = attack.generate(x=x_test)

## Now let's compare the first few benign samples with their evil twins

In [None]:
with yaspin(spinner=Spinners.aesthetic, text="Generating training attacks and retraining..."):
  num_samples = 5
  num_rows = 2

  fig, axes = plt.subplots(num_rows, num_samples, sharex=True, sharey=True, squeeze=False)
  fig.set_figheight(4.0 * num_rows)
  fig.set_figwidth(4.0 * num_samples)
  for sample_idx in range(num_samples):
    sample_axis = axes[0, sample_idx]
    sample = x_test[sample_idx, 0, :, :]
    sample_axis.imshow(
      sample, cmap="gray", aspect="equal", interpolation="nearest"
    )

    evil_twin_axis = axes[1, sample_idx]
    evil_twin = x_test_adv[sample_idx, 0, :, :]
    evil_twin_axis.imshow(
        evil_twin, cmap="gray", aspect="equal", interpolation="nearest"
    )

## Test the accuracy on the adversarial examples

In [None]:
predictions = classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
increase_font()
print(f"Adversarial accuracy: {accuracy * 100:.2f}%")

display(DisplayImage(open('repo/airplane-sweat.gif','rb').read()))

## Let's augment the training data with adversarial examples and retrain

In [None]:
with yaspin(spinner=Spinners.aesthetic, text="Generating training attacks and retraining..."):
  x_train_adv = attack.generate(x=x_train)
  classifier.fit(x_train_adv, y_train, batch_size=batch_size, nb_epochs=num_epochs)

## Retest the accuracy after retraining

In [None]:
predictions = classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
increase_font()
print(f"Retrained accuracy: {accuracy * 100:.2f}%")

display(DisplayImage(open('repo/airplane-happy.gif','rb').read()))

# Runway Numbers

## Let's now generate a set of runway numbers

In [None]:
# First digit
# Filter out all digits greater than 4 since they are not valid runway number digits
first_digit_indices = (np.argmax(y_test, axis=1) <= 3)
first_benign_digits = x_test[first_digit_indices]
first_evil_digits = x_test_adv[first_digit_indices]
first_labels = y_test[first_digit_indices]

# Let's just round our number of samples down to the nearest thousands
new_size = int(np.fix(first_benign_digits.shape[0] // 1000) * 1000)
first_benign_digits = first_benign_digits[:new_size]
first_evil_digits = first_evil_digits[:new_size]
first_labels = first_labels[:new_size]

# Make a random boolean array and use it to select between benign and evil digits
select_indices = np.full((new_size, 1, 1, 1), False)
select_indices[:int(new_size/2)] = True
np.random.shuffle(select_indices)
first_digits = np.where(select_indices, first_benign_digits, first_evil_digits)

# Second digit
# Select again between benign and evil digits
select_indices = np.full(x_test.shape[0], False)
select_indices[:int(x_test.shape[0]/2)] = True
np.random.shuffle(select_indices)
second_digits = np.where(select_indices.reshape((x_test.shape[0], 1, 1, 1)), x_test, x_test_adv)
second_labels = y_test

# Trim second digits set down to the same size as the first digit set
second_digits = second_digits[:new_size]
second_labels = second_labels[:new_size]

# Let's shuffle each set of digits so we don't get overlap
shuffle_indices = np.arange(new_size)
np.random.shuffle(shuffle_indices)
first_digits = first_digits[shuffle_indices]
first_labels = first_labels[shuffle_indices]

# Shuffle again for second digit
np.random.shuffle(shuffle_indices)
second_digits = second_digits[shuffle_indices]
second_labels = second_labels[shuffle_indices]

# Stitch the first and second digits into the same tensor
digits = np.concatenate((first_digits, second_digits), axis=1)
labels = np.stack((first_labels, second_labels), axis=1)

## Next we'll import some images of perfect runway number digits

In [None]:
digit_icons = np.full((num_labels, image_width, image_height), 0.0)
for i in range(num_labels):
  image = np.asarray(PILImage.open(f'repo/{i}.png').convert('L'))
  digit_icons[i::] = image

with yaspin(spinner=Spinners.aesthetic, text="Plotting benign samples..."):
  num_samples = 10
  num_rows = 1

  fig, axes = plt.subplots(num_rows, num_samples, sharex=True, sharey=True, squeeze=False)
  fig.set_figheight(4.0 * num_rows)
  fig.set_figwidth(4.0 * num_samples)
  for sample_idx in range(num_samples):
    sample_axis = axes[0, sample_idx]
    sample = digit_icons[sample_idx, :, :]
    sample_axis.imshow(
      sample, cmap="gray", aspect="equal", interpolation="nearest"
    )

## Run the classifier on the "runway numbers"

In [None]:
with yaspin(spinner=Spinners.aesthetic, text="Classifying runway numbers..."):
  predictions = classifier.predict(digits.reshape(new_size*2, 1, image_width, image_height)).reshape(new_size, 2, num_labels)

## Run a "monitor" on the predictions

In [None]:
first_digit_monitor = (np.argmax(predictions[:,0,:], axis=1) >= 0) & (np.argmax(predictions[:,0,:], axis=1) <= 3)

## Let's see how we did

In [None]:
first_digit_successes = np.sum(np.argmax(predictions[:,0,:], axis=1) == np.argmax(labels[:,0,:], axis=1))
first_digit_errors = predictions.shape[0] - first_digit_successes
first_digit_accuracy = first_digit_successes / predictions.shape[0]
second_digit_accuracy = np.sum(np.argmax(predictions[:,1,:], axis=1) == np.argmax(labels[:,1,:], axis=1)) / predictions.shape[0]
monitor_catches = np.sum(first_digit_monitor == False)

increase_font()
print(f"First digit errors: {first_digit_errors}")
print(f"First digit accuracy: {first_digit_accuracy * 100:.2f}%")
print(f"Second digit accuracy: {second_digit_accuracy * 100:.2f}%")
print(f"Monitor catches: {monitor_catches}")
print(f"Monitor savings: {monitor_catches / first_digit_errors * 100:.2f}%")

## Let's find examples of
*   A correctly predicted runway number
*   An incorrectly predicted runway number where the 2nd number is incorrect
*   An incorrectly predicted runway number where the 1st number is incorrect, but the monitor **was not** triggered
*   An incorrectly predicted runway number where the 1st number is incorrect, but the monitor **was** triggered



In [None]:
correct_first_digits = np.argmax(predictions[:,0,:], axis=1) == np.argmax(labels[:,0,:], axis=1)
correct_second_digits = np.argmax(predictions[:,1,:], axis=1) == np.argmax(labels[:,1,:], axis=1)

correct_idx = np.argwhere((correct_first_digits & correct_second_digits))[0][0]
incorrect_2_idx = np.argwhere(correct_first_digits & np.logical_not(correct_second_digits))[0][0]
incorrect_1_idx = np.argwhere(np.logical_not(correct_first_digits) & correct_second_digit & first_digit_monitor)[0][0]
incorrect_monitor_idx = np.argwhere(np.logical_not(first_digit_monitor))[0][0]

example_indices = [(0, correct_idx), (1, incorrect_2_idx), (2, incorrect_1_idx), (3, incorrect_monitor_idx)]

# Finally, let's plot some examples

In [None]:
with yaspin(spinner=Spinners.aesthetic, text="Plotting runway numbers outcomes..."):
  num_rows = 4
  num_columns = 6

  col_titles = [
    'Input 1',
    'Input 2',
    'Prediction 1',
    'Prediction 2',
    'Ground Truth 1',
    'Ground Truth 2'
  ]

  input_color = "Greys_r"
  success_color = "Greens_r"
  failure_color = "Reds_r"
  truth_color = "Blues_r"

  fig, axes = plt.subplots(num_rows, num_columns, sharex=True, sharey=True, squeeze=False)
  fig.set_figheight(4.0 * num_rows)
  fig.set_figwidth(4.0 * num_rows)

  for col_idx in range(num_columns):
    axes[0, col_idx].set_title(col_titles[col_idx])

  for row_idx, example_idx in example_indices:
    input1 = digits[example_idx, 0, :, :]
    input2 = digits[example_idx, 1, :, :]
    prediction1 = digit_icons[np.argmax(predictions[example_idx, 0, :])]
    prediction2 = digit_icons[np.argmax(predictions[example_idx, 1, :])]
    truth1 = digit_icons[np.argmax(labels[example_idx, 0, :])]
    truth2 = digit_icons[np.argmax(labels[example_idx, 1, :])]

    axes[row_idx, 0].imshow(
      input1, cmap=input_color, aspect="equal", interpolation="nearest"
    )
    axes[row_idx, 1].imshow(
      input2, cmap=input_color, aspect="equal", interpolation="nearest"
    )

    pred_color = success_color if first_digit_monitor[example_idx] and np.array_equal(prediction1, truth1) else failure_color
    axes[row_idx, 2].imshow(
      prediction1, cmap=pred_color, aspect="equal", interpolation="nearest"
    )
    pred_color = success_color if np.array_equal(prediction2, truth2) else failure_color
    axes[row_idx, 3].imshow(
      prediction2, cmap=pred_color, aspect="equal", interpolation="nearest"
    )

    axes[row_idx, 4].imshow(
      truth1, cmap=truth_color, aspect="equal", interpolation="nearest"
    )
    axes[row_idx, 5].imshow(
      truth2, cmap=truth_color, aspect="equal", interpolation="nearest"
    )