In [1]:
%matplotlib inline

from scipy.misc     import imsave
from keras          import metrics
from PIL            import Image

from keras import layers
from keras.models                      import Model, load_model, Sequential
from keras.applications.vgg16          import VGG16
from keras.applications.vgg16          import decode_predictions
from keras.utils.np_utils              import to_categorical

import keras.backend     as K
import numpy             as np
import matplotlib.pyplot as plt

from cleverhans.utils_keras import KerasModelWrapper
from cleverhans.attacks import FastGradientMethod

Using TensorFlow backend.


In [2]:
import warnings

warnings.filterwarnings('ignore')

In [3]:
def limit_mem():
    cfg                          = K.tf.ConfigProto()
    cfg.gpu_options.allow_growth = True
    K.set_session(K.tf.Session(config = cfg))

In [4]:
limit_mem()

In [5]:
def create_title(category, proba):
    return '"%s" %.1f%% confidence' % (category.replace('_', ' '), proba * 100) 

In [6]:
def generate_title(model, array):
    prediction = model.predict(array)
    _, category, proba = decode_predictions(prediction)[0][0]
    
    return create_title(category, proba)

In [7]:
filename = 'images/stop2.jpeg'

# Goal

The goal of this notebook is to implement the "fast gradient sign method" presented in [Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572). This method is used to modify classical samples that a deep neural network trained classification will fail to classify properly.

![fgsm idea](http://karpathy.github.io/assets/break/breakconv.png "Fast Gradient Sign Method")

The idea of this method is to take a sample, ask the network to classify it, compute the gradient of the loss in function of the input pixels and update the picture by a small amount in the direction of the gradient. This direction is opposite to the one that would increase the score for the correct class.

# VGG loading

In [8]:
original_pic   = Image.open(filename).resize((48, 48))
original_array = np.expand_dims(np.array(original_pic), 0)
print(original_array.shape)
original_pic

(1, 48, 48, 3)


<PIL.Image.Image image mode=RGB size=48x48 at 0x7F13642BB908>

We transform the image according to the VGG16 requirements. It consists in substracting the mean value for each channel and reversing the order of the channels. 

In [9]:
imagenet_mean      = np.array([123.68, 116.779, 103.939], dtype = np.float32)
#preprocess         = lambda x: (x - imagenet_mean)[:, :, :, ::-1]
#deprocess          = lambda x: (x[:, :, :, ::-1] + imagenet_mean)

preprocess         = lambda x: (x - imagenet_mean)
deprocess          = lambda x: (x + imagenet_mean)
preprocessed_array = preprocess(original_array)
#preprocessed_array = preprocessed_array.reshape(1,3,48,48)
print(preprocessed_array.shape)
model              = load_model('model_pur_transfert_VGG16_roadsigns.h5')

model.summary()


(1, 48, 48, 3)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 48, 48, 32)        896       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 46, 46, 32)        9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 23, 23, 32)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 23, 23, 32)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 21, 21, 64)        18496     
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 19, 19, 64)        36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 9, 9, 64)          0     

Now let's ask the network what he sees in the picture. 

In [10]:
#pred = model.predict(preprocessed_array).argmax()
pred = model.predict(original_array).argmax()
#print(*[p[1:] for p in decode_predictions(pred)[0]], sep = '\n')
print(pred)

14


# Adversarial example generation

Now we try to confuse the network by modifying the brocoli picture. We first compute the derivatives of the loss function according to the pixels of the image.

In [11]:
target_idx      = model.predict(original_array).argmax()
target          = to_categorical(target_idx, 43)
target_variable = K.variable(target)
loss            = metrics.categorical_crossentropy(model.output, target_variable)
gradients       = K.gradients(loss, model.input)
get_grad_values = K.function([model.input], gradients)
grad_values     = get_grad_values([original_array])[0]



As mentioned in the article, we will just use the signs of the derivatives.

In [12]:
grad_signs = np.sign(grad_values)

Now we multiply these values by a very small number and add them to the pixel values of the original image. The conversion to uint8 is necessary to get a proper display using imshow ([Stackoverflow](https://stackoverflow.com/questions/39925420/bizzare-matplotlib-behaviour-in-displaying-images-cast-as-floats)). We also clip the values to the interval [0, 255] to get a valid image.

In [13]:
epsilon         = 4
perturbation    = grad_signs * epsilon
#modified_array  = preprocessed_array + perturbation
modified_array  = preprocessed_array + perturbation
deprocess_array = np.clip(deprocess(modified_array), 0., 255.).astype(np.uint8)

In [15]:
fgsm_params = {
      'eps': 0.3,
      'y': 14,
      'y_target': 10,
      'clip_min': 0.,
      'clip_max': 1.
  }

# Initialize the Fast Gradient Sign Method (FGSM) attack object
wrap = KerasModelWrapper(model)
fgsm = FastGradientMethod(wrap)

fgsm = FastGradientMethod(wrap, **fgsm_params)
adv_clever = fgsm.generate(original_array)
pred_adv_clever = model.predict(adv_clever)

ValueError: Layer model_1 was called with an input that isn't a symbolic tensor. Received type: <type 'numpy.ndarray'>. Full input: [array([[[[127, 126, 132],
         [254, 255, 253],
         [246, 253, 245],
         ..., 
         [111, 108,  93],
         [ 94,  96,  59],
         [ 84,  86,  49]],

        [[255, 255, 255],
         [253, 255, 252],
         [116, 121, 117],
         ..., 
         [210, 214, 200],
         [118, 115,  84],
         [ 89,  93,  58]],

        [[247, 249, 246],
         [249, 251, 250],
         [ 89,  88,  93],
         ..., 
         [149, 139, 137],
         [148, 143, 114],
         [140, 142, 102]],

        ..., 
        [[ 66,  70,  53],
         [ 89,  88,  83],
         [ 53,  44,  47],
         ..., 
         [154, 142, 144],
         [ 82,  81,  76],
         [141, 143, 130]],

        [[ 61,  65,  66],
         [140, 139, 135],
         [108, 101,  93],
         ..., 
         [139, 129, 130],
         [144, 145, 140],
         [127, 124, 107]],

        [[148, 136, 146],
         [101,  92,  93],
         [114, 111, 106],
         ..., 
         [122, 113, 114],
         [110, 110, 102],
         [ 53,  46,  36]]]], dtype=uint8)]. All inputs to the layer should be tensors.

In [None]:
#title_original     = generate_title(model, preprocessed_array)
#title_perturbation = generate_title(model, perturbation)
#title_modified     = generate_title(model, modified_array)

In [None]:
plt.figure(figsize = (17, 17))
plt.subplot(1, 3, 1)
plt.imshow(original_array[0])
#plt.title(title_original)
plt.subplot(1, 3, 2)
plt.imshow(perturbation[0])
#plt.title(title_perturbation)
plt.subplot(1, 3, 3)
plt.imshow(deprocess_array[0])
#plt.title(title_modified)

In [None]:
print('mean value of perturbation:', perturbation.mean())
pred = model.predict(modified_array).argmax()
print(pred)
#print(*[p[1:] for p in decode_predictions(pred)[0]], sep = '\n')