## MNIST

In this first section, the base DeepFool attack is demonstrated on the MNIST dataset.

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import sys
sys.path.append("..")

%matplotlib inline

import numpy as np

from sklearn.metrics import classification_report

import keras
from keras import metrics
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.layers import Dense, Flatten
from keras.models import Model, load_model
import keras.backend as k
from matplotlib import pyplot as plt
from IPython.display import clear_output

from art.config import ART_DATA_PATH
from art.estimators.classification import KerasClassifier
from art.utils import to_categorical, load_dataset, get_file

import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from algorithms.deepfool import DeepFool

In [2]:
(x_train, y_train), (x_test, y_test), min_, max_ = load_dataset('mnist')

path = get_file('mnist_cnn_original.h5', extract=False, path=ART_DATA_PATH,
                url='https://www.dropbox.com/s/p2nyzne9chcerid/mnist_cnn_original.h5?dl=1')

METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.CategoricalAccuracy(name='categorical_accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
]

classifier_model = load_model(path)
classifier = KerasClassifier(clip_values=(min_, max_), model=classifier_model, use_logits=True)
classifier_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=METRICS)
classifier.fit(x_train, y_train, nb_epochs=1, batch_size=128, verbose=1)



In [3]:
classifier_model.summary()
pred = classifier.predict(x_test)
x_test_pred = np.argmax(pred, axis=1)
nb_correct_pred = np.sum(x_test_pred == np.argmax(y_test, axis=1))
accuracy = np.mean(np.argmax(pred, axis=1) == np.argmax(y_test, axis=1))
base_results = classifier.model.evaluate(x_test, y_test, verbose=1)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1600)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               204928    
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1

In [4]:
print(f"---Original test images---:")
print("Correctly classified: {}".format(nb_correct_pred))
print("Accuracy on test samples: %f" % accuracy)
dict(zip(classifier.model.metrics_names, base_results))

---Original test images---:
Correctly classified: 9920
Accuracy on test samples: 0.992000


{'loss': 0.0681226589658379,
 'tp': 9920.0,
 'fp': 80.0,
 'tn': 89920.0,
 'fn': 80.0,
 'categorical_accuracy': 0.992,
 'precision': 0.992,
 'recall': 0.992,
 'auc': 0.99718785}

In [5]:
attacker = DeepFool(classifier)
x_test_adv = attacker.generate(x_test)

Targeted model should output logits, not probabilities for predictions.
DeepFool: 100%|██████████| 10000/10000 [41:41<00:00,  4.00it/s]


In [6]:
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]

adv_results = classifier.model.evaluate(x_test_adv, y_test, verbose=1)

print(np.sum(preds == np.argmax(y_test, axis=1)))
dict(zip(classifier.model.metrics_names, adv_results))

9305


{'loss': 0.8877479232385777,
 'tp': 9297.0,
 'fp': 682.0,
 'tn': 89318.0,
 'fn': 703.0,
 'categorical_accuracy': 0.9305,
 'precision': 0.9316565,
 'recall': 0.9297,
 'auc': 0.9677834}

## CFAIR-10

In [2]:
import logging

from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Activation, Dropout

In [3]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
formatter = logging.Formatter("[%(levelname)s] %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)

In [4]:
(x_train, y_train), (x_test, y_test), min_, max_ = load_dataset(str("cifar10"))

In [5]:
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.CategoricalAccuracy(name='categorical_accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
]

model = Sequential()
model.add(Conv2D(32, (3, 3), padding="same", input_shape=x_train.shape[1:]))
model.add(Activation("relu"))
model.add(Conv2D(32, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=METRICS)

In [6]:
classifier = KerasClassifier(model=model, clip_values=(min_, max_))
classifier.fit(x_train, y_train, nb_epochs=15, batch_size=128, verbose=0)

[INFO] Inferred 17 hidden layers on Keras classifier.


In [7]:
preds = np.argmax(classifier.predict(x_test), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
base_results = classifier.model.evaluate(x_test, y_test, verbose=1)
print("Classifier results before attack: ")
dict(zip(classifier.model.metrics_names, base_results))

Classifier results before attack: 


{'loss': 0.644457839679718,
 'tp': 7431.0,
 'fp': 1455.0,
 'tn': 88545.0,
 'fn': 2569.0,
 'categorical_accuracy': 0.7854,
 'precision': 0.8362593,
 'recall': 0.7431,
 'auc': 0.9740894}

In [8]:
logger.info("Create DeepFool attack")
adv_crafter = DeepFool(classifier)
#logger.info("Craft attack on training examples")
#x_train_adv = adv_crafter.generate(x_train)
logger.info("Craft attack test examples")
x_test_adv = adv_crafter.generate(x_test)

[INFO] Create DeepFool attack
[INFO] Craft attack test examples
DeepFool: 100%|██████████| 10000/10000 [23:48<00:00,  7.00it/s] 
[INFO] DeepFool attack success rate: 88.58%


In [9]:
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info("Classifier after adversarial training")
logger.info("Accuracy on adversarial samples: %.2f%%", (acc * 100))
adv_results = classifier.model.evaluate(x_test_adv, y_test, verbose=1)
print("Classifier results after attack: ")
dict(zip(classifier.model.metrics_names, adv_results))

[INFO] Classifier after adversarial training
[INFO] Accuracy on adversarial samples: 16.02%


Classifier results after attack: 


{'loss': 4.734081149291992,
 'tp': 397.0,
 'fp': 5385.0,
 'tn': 84615.0,
 'fn': 9603.0,
 'categorical_accuracy': 0.1602,
 'precision': 0.06866136,
 'recall': 0.0397,
 'auc': 0.7188941}

## Extension

In this extension, FastDeepFool was implemented to have a dynamic epsilon value for its overshooting correction parameter, which logarithmically decreases as the attack progresses. This allows for larger corrections early on, with finer tuning as it goes.

In [4]:
from algorithms.dynamic_deepfool import DynamicDeepFool

In [5]:
(x_train, y_train), (x_test, y_test), min_, max_ = load_dataset(str("cifar10"))

In [6]:
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.CategoricalAccuracy(name='categorical_accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
]

model = Sequential()
model.add(Conv2D(32, (3, 3), padding="same", input_shape=x_train.shape[1:]))
model.add(Activation("relu"))
model.add(Conv2D(32, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(Conv2D(64, (3, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=METRICS)

In [7]:
classifier = KerasClassifier(model=model, clip_values=(min_, max_))
classifier.fit(x_train, y_train, nb_epochs=15, batch_size=128, verbose=1)

[INFO] Inferred 17 hidden layers on Keras classifier.


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [8]:
preds = np.argmax(classifier.predict(x_test), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
base_results = classifier.model.evaluate(x_test, y_test, verbose=1)
print("Classifier results before attack: ")
dict(zip(classifier.model.metrics_names, base_results))

Classifier results before attack: 


{'loss': 0.6376115732192993,
 'tp': 7388.0,
 'fp': 1408.0,
 'tn': 88592.0,
 'fn': 2612.0,
 'categorical_accuracy': 0.7827,
 'precision': 0.83992726,
 'recall': 0.7388,
 'auc': 0.97513795}

In [9]:
logger.info("Create DeepFool attack")
adv_crafter = DynamicDeepFool(classifier)
logger.info("Craft attack on test examples")
x_test_adv = adv_crafter.generate(x_test)

[INFO] Create DeepFool attack
[INFO] Craft attack on test examples
DeepFool: 100%|██████████| 10000/10000 [24:21<00:00,  6.84it/s] 
[INFO] DeepFool attack success rate: 97.99%


In [10]:
preds = np.argmax(classifier.predict(x_test_adv), axis=1)
acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0]
logger.info("Classifier after adversarial training")
logger.info("Accuracy on adversarial samples: %.2f%%", (acc * 100))
adv_results = classifier.model.evaluate(x_test_adv, y_test, verbose=1)
print("Classifier results after attack: ")
dict(zip(classifier.model.metrics_names, adv_results))

[INFO] Classifier after adversarial training
[INFO] Accuracy on adversarial samples: 14.19%


Classifier results after attack: 


{'loss': 3.283737688446045,
 'tp': 366.0,
 'fp': 4831.0,
 'tn': 85169.0,
 'fn': 9634.0,
 'categorical_accuracy': 0.1419,
 'precision': 0.07042524,
 'recall': 0.0366,
 'auc': 0.7628513}