In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import numpy as np
import pandas as pd
import logging
import keras
from keras import backend
import tensorflow as tf
from tensorflow.python.platform import flags

from cleverhans.utils import set_log_level
from cleverhans.utils_mnist import data_mnist
from cleverhans.utils_tf import model_train, model_eval
from cleverhans.attacks import FastGradientMethod
from cleverhans.utils import AccuracyReport
from cleverhans.utils_keras import cnn_model
from cleverhans.utils_keras import KerasModelWrapper

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
# General setup
set_log_level(logging.DEBUG)
rng = np.random.RandomState([2017, 8, 30])
accuracies = {}

n_epochs = 4 # 10
batch_size = 128
learning_rate = 0.001

train_params = {
    'nb_epochs': n_epochs,
    'batch_size': batch_size,
    'learning_rate': learning_rate
}

In [4]:
# Setup dataset
train_start, train_end = 0, 60000
test_start, test_end = 0, 10000
holdout = 150

# Get MNIST data
X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                              train_end=train_end,
                                              test_start=test_start,
                                              test_end=test_end)

# Initialize substitute training set reserved for adversary
X_sub = X_test[:holdout]
Y_sub = np.argmax(Y_test[:holdout], axis=1)

# Redefine test set as remaining samples unavailable to adversaries
X_test = X_test[holdout:]
Y_test = Y_test[holdout:]

Extracting /tmp/train-images-idx3-ubyte.gz
Extracting /tmp/train-labels-idx1-ubyte.gz
Extracting /tmp/t10k-images-idx3-ubyte.gz
Extracting /tmp/t10k-labels-idx1-ubyte.gz
X_train shape: (60000, 28, 28, 1)
X_test shape: (10000, 28, 28, 1)


In [5]:
def test(sess, X_test, Y_test, x, y, fd_model_preds, attack_preds, batch_size=128):
    eval_params = {'batch_size': batch_size}
    fd_accuracy = model_eval(sess, x, y, fd_model_preds, X_test, Y_test, args=eval_params)
    fd_attack_accuracy = model_eval(sess, x, y, attack_preds, X_test, Y_test, args=eval_params)
    return fd_accuracy, fd_attack_accuracy

In [6]:
# Let's train three different models to evaluate

In [12]:
# model_1 - trained on only 1000 examples (should suck)
g_1 = tf.Graph()
with g_1.as_default():

    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    with tf.Session() as sess_1:

        model_1 = cnn_model()
        preds_model_1 = model_1(x)
        model_train(sess_1, x, y, preds_model_1, X_train[:1000], Y_train[:1000], args=train_params, rng=rng)

[INFO 2018-03-17 15:01:49,104 cleverhans] Epoch 0 took 0.5632290840148926 seconds
[INFO 2018-03-17 15:01:49,760 cleverhans] Epoch 1 took 0.6550502777099609 seconds
[INFO 2018-03-17 15:01:50,336 cleverhans] Epoch 2 took 0.5753214359283447 seconds
[INFO 2018-03-17 15:01:50,897 cleverhans] Epoch 3 took 0.5603077411651611 seconds
[INFO 2018-03-17 15:01:50,897 cleverhans] Completed model training.


In [8]:
# model_2 - trained on all 60000 examples (should be good)
g_2 = tf.Graph()
with g_2.as_default():
    
    with tf.Session() as sess_2:
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        y = tf.placeholder(tf.float32, shape=(None, 10))

        model_2 = cnn_model()
        preds_model_2 = model_2(x)
        model_train(sess_2, x, y, preds_model_2, X_train, Y_train, args=train_params, rng=rng)

[INFO 2018-03-17 14:54:06,279 cleverhans] Epoch 0 took 32.39285731315613 seconds
[INFO 2018-03-17 14:54:38,447 cleverhans] Epoch 1 took 32.161863565444946 seconds
[INFO 2018-03-17 14:55:10,843 cleverhans] Epoch 2 took 32.391605615615845 seconds
[INFO 2018-03-17 14:55:43,061 cleverhans] Epoch 3 took 32.210278034210205 seconds
[INFO 2018-03-17 14:55:43,062 cleverhans] Completed model training.


In [9]:
# model_3 - adversarially trained on all 60000 examples (should be super good)
g_3 = tf.Graph()
with g_3.as_default():
    
    with tf.Session() as sess_3:
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        y = tf.placeholder(tf.float32, shape=(None, 10))

        model_3 = cnn_model()
        preds_model_3 = model_3(x)
        wrap = KerasModelWrapper(model_3)
        fgsm = FastGradientMethod(wrap, sess=sess_3)
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        preds_model_3_adv = model_3(fgsm.generate(x, **fgsm_params))
        model_train(sess_3, x, y, preds_model_3, X_train, Y_train, predictions_adv=preds_model_3_adv, args=train_params, rng=rng)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


[INFO 2018-03-17 14:56:54,468 cleverhans] Epoch 0 took 70.9706621170044 seconds
[INFO 2018-03-17 14:58:05,331 cleverhans] Epoch 1 took 70.85821986198425 seconds
[INFO 2018-03-17 14:59:15,716 cleverhans] Epoch 2 took 70.37824511528015 seconds
[INFO 2018-03-17 15:00:26,091 cleverhans] Epoch 3 took 70.36933493614197 seconds
[INFO 2018-03-17 15:00:26,091 cleverhans] Completed model training.


In [13]:
# Whitebox testing
accuracies = pd.DataFrame(columns=['dataset', 'attack_name', 'clean_accuracy', 'corrupted_accuracy'])
for i, (g, sess, model, model_preds) in enumerate([(g_1, sess_1, model_1, preds_model_1), (g_2, sess_2, model_2, preds_model_2), (g_3, sess_3, model_3, preds_model_3)]):
    with g.as_default():
        fgsm = FastGradientMethod(KerasModelWrapper(model), sess=sess)
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        attack_preds = model(fgsm.generate(x, **fgsm_params))
        fd_accuracy, fd_attack_accuracy = test(sess, X_test, Y_test, x, y, model_preds, attack_preds)
        accuracies.loc['model_{}'.format(i)] = ('mnist_standard_split', 'whitebox_fgsm', fd_accuracy, fd_attack_accuracy)
    print(accuracies)

RuntimeError: Attempted to use a closed Session.

In [None]:
accuracies