# TensorFlow implementation of the MobileNetV2 model: Adversarial attack demo part 1

In [None]:
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt 

pretrained_model = tf.keras.applications.MobileNetV2(include_top=True,
                                                     weights='imagenet')
pretrained_model.trainable = False

# Loading ImageNet labels
decode_predictions = tf.keras.applications.mobilenet_v2.decode_predictions


Let's also define helper functions to preprocess the image and to extract labels from the probability vector returned by model.predict()


In [None]:
def preprocess(image):
                      image = tf.cast(image, tf.float32)
                      image = tf.image.resize(image, (224, 224))
                      image = tf.keras.applications.mobilenet_v2.preprocess_input(image)
                      image = image[None, ...]
                      return image

def get_imagenet_label(probs):
     return decode_predictions(probs, top=1)[0][0]

The image that we are going to be using is that of a panda, since pandas are the poster-boys of the adversarial attack world. (The first paper showed an adversarial attack with an image of a panda and since then most of the articles written on adversarial attacks have been using this image). Let's load the image, preprocess it and get the class.


In [None]:
image_raw = tf.io.read_file("panda.jpeg")
image = tf.image.decode_image(image_raw)
image = preprocess(image)
image_probs = pretrained_model.predict(image)

plt.figure()
plt.imshow(image[0] * 0.5 + 0.5)  # To change [-1, 1] to [0,1]
_, image_class, class_confidence = get_imagenet_label(image_probs)
plt.title('{} : {:.2f}% Confidence'.format(image_class, class_confidence*100))
plt.show()

![panda_pred](https://blog.paperspace.com/content/images/2022/11/panda_pred.png)

The image is classified as "giant panda" with 86.27% confidence.

Let's create the perturbations by taking the gradients of the loss wrt original image. These perturbations will then be added to the original image itself.


In [None]:
loss_function = tf.keras.losses.CategoricalCrossentropy()
def create_adversarial_pattern(input_image, input_label):
  with tf.GradientTape() as tape:
    tape.watch(input_image)
    prediction = pretrained_model(input_image)
    loss = loss_function(input_label, prediction)

  # Get the gradients of the loss w.r.t to the input image.
  gradient = tape.gradient(loss, input_image)
  # Get the sign of the gradients to create the perturbation
  signed_grad = tf.sign(gradient)
  return signed_grad,gradient

Let's also visualize this.


In [None]:
# Get the input label of the image.
class_idx = 388 # index of the giant_panda class
label = tf.one_hot(class_idx, image_probs.shape[-1])
label = tf.reshape(label, (1, image_probs.shape[-1]))

perturbations,gradient = create_adversarial_pattern(image, label)
plt.imshow(perturbations[0] * 0.5 + 0.5); 

![noise](https://blog.paperspace.com/content/images/2022/11/noise.png)

Deciding the right ε value beforehand is quite tricky. Therefore, we'll experiment with multiple values.


In [None]:
epsilons = [0, 0.01,0.03,0.1, 0.15,0.3]
descriptions = [('Epsilon = {:0.3f}'.format(eps) if eps else 'Original Image')
                for eps in epsilons]
for i, eps in enumerate(epsilons):
    adv_x = image + eps*perturbations
    image = tf.clip_by_value(adv_x, -1, 1)
    _, label, confidence = get_imagenet_label(pretrained_model.predict(image))
    
    axs[pos[i][0], pos[i][1]].imshow(image[0]*0.5+0.5)
    axs[pos[i][0], pos[i][1]].set_title('{} \n {} : {:.2f}%'.format(descriptions[i],label, confidence*100))


![adv_attack](https://blog.paperspace.com/content/images/2022/11/adv_attack.png)

As we increase the epsilon value, the misclassification increases as identified by the class and confidence. Also, the image looks more and more perturbed. As expected,there seems to be a trade-off between the two.