In [43]:
import pandas
import numpy
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
"""
This tutorial shows how to generate adversarial examples
using FGSM in black-box setting.
The original paper can be found at:
https://arxiv.org/abs/1602.02697
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
from six.moves import xrange

import logging
import tensorflow as tf
from tensorflow.python.platform import flags

from cleverhans.utils_mnist import data_mnist
from cleverhans.utils import to_categorical
from cleverhans.utils import set_log_level
from cleverhans.utils_tf import model_train, model_eval, batch_eval
from cleverhans.attacks import FastGradientMethod
from cleverhans.attacks_tf import jacobian_graph, jacobian_augmentation

from cleverhans_tutorials.tutorial_models import make_basic_cnn, MLP
from cleverhans_tutorials.tutorial_models import Flatten, Linear, ReLU, Softmax
from cleverhans.utils import TemporaryLogLevel

FLAGS = flags.FLAGS

# Functions

## Data

In [160]:
'''
MOONS
'''
def get_moon():
    X, y = make_moons(noise=0.3, random_state=1, n_samples=10000)
    y2 = np.zeros((X.shape[0],2))
    for k in range(len(y)):
        y2[k][y[k]] = 1
    return X, y2
DATASETS_ = {'moons':get_moon}

## Training a black-box

In [141]:
'''
PAPERNOT BB
'''
def Papernot_bbox(sess, x, y, X_train, Y_train, X_test, Y_test,
              nb_epochs, batch_size, learning_rate,
              rng):
    """
    Define and train a model that simulates the "remote"
    black-box oracle described in the original paper.
    :param sess: the TF session
    :param x: the input placeholder for MNIST
    :param y: the ouput placeholder for MNIST
    :param X_train: the training data for the oracle
    :param Y_train: the training labels for the oracle
    :param X_test: the testing data for the oracle
    :param Y_test: the testing labels for the oracle
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param rng: numpy.random.RandomState
    :return:
    """

    # Define TF model graph (for the black-box model)
    model = make_basic_cnn()
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    model_train(sess, x, y, predictions, X_train, Y_train,
                args=train_params, rng=rng)

    # Print out the accuracy on legitimate data
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, predictions, X_test, Y_test,
                          args=eval_params)
    print('Test accuracy of black-box on legitimate test '
          'examples: ' + str(accuracy))

    return model, predictions, accuracy

def RF_bbox(X_train, Y_train, X_test, Y_test):
    # Define RF model graph (for the black-box model)

    model = RandomForestClassifier(n_estimators=100, n_jobs=-1).fit(X_train, Y_train)
    
    # Print out the accuracy on legitimate data
    predictions = model.predict_proba(X_test)[1]
    
    accuracy = accuracy_score(Y_test, model.predict(X_test))
    #roc_auc = roc_auc_score(Y_test, predictions[1][:,1])
    print('Test accuracy of black-box on legitimate test '
          'examples: ' + str(accuracy))
    #print('Test ROC AUC of black-box on legitimate test ' 'examples: ' + str(roc_auc))
    return model, predictions, accuracy
    
BB_MODELS_ = {'dnn': PAP_bbox,
            'rf': RF_bbox}

## Papernot Surrogate

In [188]:
def setup_tutorial():
    """
    Helper function to check correct configuration of tf for tutorial
    :return: True if setup checks completed
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    return True
def substitute_model(img_rows=1, img_cols=2, nb_classes=2):
    """
    Defines the model architecture to be used by the substitute. Use
    the example model interface.
    :param img_rows: number of rows in input
    :param img_cols: number of columns in input
    :param nb_classes: number of classes in output
    :return: tensorflow model
    """
    input_shape = (None, img_rows, img_cols, 1)

    # Define a fully connected model (it's different than the black-box)
    layers = [Flatten(),
              Linear(200),
              ReLU(),
              Linear(200),
              ReLU(),
              Linear(nb_classes),
              Softmax()]
    layers = [Flatten(), Linear(nb_classes), Softmax()]

    return MLP(layers, input_shape)


def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes,
              nb_epochs_s, batch_size, learning_rate, data_aug, lmbda,
              rng):
    """
    This function creates the substitute by alternatively
    augmenting the training data and training the substitute.
    :param sess: TF session
    :param x: input TF placeholder
    :param y: output TF placeholder
    :param bbox_preds: output of black-box model predictions
    :param X_sub: initial substitute training data
    :param Y_sub: initial substitute training labels
    :param nb_classes: number of output classes
    :param nb_epochs_s: number of epochs to train substitute model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param data_aug: number of times substitute training data is augmented
    :param lmbda: lambda from arxiv.org/abs/1602.02697
    :param rng: numpy.random.RandomState instance
    :return:
    """
    # Define TF model graph (for the black-box model)
    model_sub = substitute_model()
    preds_sub = model_sub(x)
    print("Defined TensorFlow model graph for the substitute.")

    # Define the Jacobian symbolically using TensorFlow
    grads = jacobian_graph(preds_sub, x, nb_classes)

    # Train the substitute and augment dataset alternatively
    for rho in xrange(data_aug):
        print("Substitute training epoch #" + str(rho))
        train_params = {
            'nb_epochs': nb_epochs_s,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        with TemporaryLogLevel(logging.WARNING, "cleverhans.utils.tf"):
            model_train(sess, x, y, preds_sub, X_sub,
                        to_categorical(Y_sub, nb_classes),
                        init_all=False, args=train_params, rng=rng)

        # If we are not at last substitute training iteration, augment dataset
        if rho < data_aug - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            lmbda_coef = 2 * int(int(rho / 3) != 0) - 1
            X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads,
                                          lmbda_coef * lmbda)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            Y_sub = np.hstack([Y_sub, Y_sub])
            X_sub_prev = X_sub[int(len(X_sub)/2):]
            eval_params = {'batch_size': 200}#ok je donnerai 
            
            bbox_preds = tf.convert_to_tensor(bbox_preds, dtype=tf.float32)            
            print('x sub prev', X_sub_prev.shape)
            bbox_val = batch_eval2(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0]
            
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1)

    return model_sub, preds_sub

In [187]:
main_fidelity()

Preparing the black-box model.
Test accuracy of black-box on legitimate test examples: 0.8955
Training the substitute model.
Defined TensorFlow model graph for the substitute.
Substitute training epoch #0


[INFO 2018-06-21 16:45:37,295 cleverhans] Epoch 0 took 0.12337374687194824 seconds
INFO:cleverhans:Epoch 0 took 0.12337374687194824 seconds
[INFO 2018-06-21 16:45:37,296 cleverhans] Completed model training.
INFO:cleverhans:Completed model training.


Augmenting substitute training data.
Labeling substitute training data.
x sub prev (10, 2)
start 0
end 200
input batches [array([[ 0.60280051,  0.76102675],
       [-0.85730031, -0.01678787],
       [-1.23908612,  0.50445917],
       [-0.4489277 ,  0.75046783],
       [ 1.25778821, -0.00768521],
       [ 0.88444481,  0.18134661],
       [ 0.33922971, -0.07833283],
       [ 0.10649866,  0.91411409],
       [-0.6601705 ,  0.16874348],
       [ 0.96139662,  0.56414275]])]
feed_dict {<tf.Tensor 'Placeholder_168:0' shape=(?, 2) dtype=float32>: array([[ 0.60280051,  0.76102675],
       [-0.85730031, -0.01678787],
       [-1.23908612,  0.50445917],
       [-0.4489277 ,  0.75046783],
       [ 1.25778821, -0.00768521],
       [ 0.88444481,  0.18134661],
       [ 0.33922971, -0.07833283],
       [ 0.10649866,  0.91411409],
       [-0.6601705 ,  0.16874348],
       [ 0.96139662,  0.56414275]])}
[[ 1.  0.]
 [ 1.  0.]
 [ 1.  0.]
 ..., 
 [ 1.  0.]
 [ 0.  1.]
 [ 1.  0.]]
(2000, 2)
10


AssertionError: (2000, 2)

In [184]:

def batch_eval2(sess, tf_inputs, tf_outputs, numpy_inputs, feed=None,
               args=None):
    """
    A helper function that computes a tensor on numpy inputs by batches.
    :param sess:
    :param tf_inputs:
    :param tf_outputs:
    :param numpy_inputs:
    :param feed: An optional dictionary that is appended to the feeding
             dictionary before the session runs. Can be used to feed
             the learning phase of a Keras model for instance.
    :param args: dict or argparse `Namespace` object.
                 Should contain `batch_size`
    """
    args = _ArgsWrapper(args or {})

    assert args.batch_size, "Batch size was not given in args dict"

    n = len(numpy_inputs)
    assert n > 0
    assert n == len(tf_inputs)
    m = numpy_inputs[0].shape[0]
    for i in xrange(1, n):
        assert numpy_inputs[i].shape[0] == m
    out = []
    for _ in tf_outputs:
        out.append([])
    with sess.as_default():
        for start in xrange(0, m, args.batch_size):
            batch = start // args.batch_size
            if batch % 100 == 0 and batch > 0:
                _logger.debug("Batch " + str(batch))

            # Compute batch start and end indices
            start = batch * args.batch_size
            end = start + args.batch_size
            print('start', start)
            print('end', end)
            numpy_input_batches = [numpy_input[start:end]
                                   for numpy_input in numpy_inputs]
            print('input batches', numpy_input_batches.shape)
            cur_batch_size = numpy_input_batches[0].shape[0]
            assert cur_batch_size <= args.batch_size
            
            for e in numpy_input_batches:
                assert e.shape[0] == cur_batch_size

            feed_dict = dict(zip(tf_inputs, numpy_input_batches))
            print('feed_dict', feed_dict)
            if feed is not None:
                feed_dict.update(feed)
            numpy_output_batches = sess.run(tf_outputs, feed_dict=feed_dict) #PROBLEME ICI : AU LIEU DES BATCHES, TOUT LE DATASET DE PRED
            for e in numpy_output_batches:
                print(e)
                print(e.shape)
                print(cur_batch_size)
                assert e.shape[0] == cur_batch_size, e.shape #ERREUR ICI
            for out_elem, numpy_output_batch in zip(out, numpy_output_batches):
                out_elem.append(numpy_output_batch)

    out = [np.concatenate(x, axis=0) for x in out]
    for e in out:
        assert e.shape[0] == m, e.shape
    return out

class _ArgsWrapper(object):

    """
    Wrapper that allows attribute access to dictionaries
    """

    def __init__(self, args):
        if not isinstance(args, dict):
            args = vars(args)
        self.args = args

    def __getattr__(self, name):
        return self.args.get(name)

Usage: 
print("Training the substitute model.")
    train_sub_out = train_sub(sess, x, y, bbox_preds, X_sub, Y_sub,
                              nb_classes, nb_epochs_s, batch_size,
                              learning_rate, data_aug, lmbda, rng=rng)
    model_sub, preds_sub = train_sub_out

# Our surrogate

NameError: name 'X' is not defined

# Local Fidelity

In [None]:
acc = model_eval(sess, x, y, preds_sub, X_test, Y_test, args=eval_params)1



# Framework

In [116]:

def main_fidelity():
    # Trucs que je comprends pas
    set_log_level(logging.DEBUG)
    assert setup_tutorial()
    sess = tf.Session()
    
    accuracies = {}
    X, Y = DATASETS_['moons']()
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)
    X_sub = X_test[:holdout]
    Y_sub = np.argmax(Y_test[:holdout], axis=1)
    '''
    Pour PAPERNOT + blackbox si DNN (jy connais évidemment rien à tensorflow)
    '''
    '''# Redefine test set as remaining samples unavailable to adversaries
    X_test = X_test[holdout:]
    Y_test = Y_test[holdout:]'''
    # Define input and output TF placeholders
    x = tf.placeholder(tf.float32, shape=(None, 2))
    y = tf.placeholder(tf.float32, shape=(None, 2))
    
    # Seed random number generator so tutorial is reproducible
    rng = np.random.RandomState([2017, 8, 30])
    
    # Simulate the black-box model locally
    # You could replace this by a remote labeling API for instance
    print("Preparing the black-box model.")
    prep_bbox_out = BB_MODELS_['rf'](X_train, Y_train, X_test, Y_test) #(sess, x, y, X_train, Y_train, X_test, Y_test, nb_epochs, batch_size, learning_rate, rng) #ici marche pas mais de toute facon on veut pas le rendre generique on veut juste choisir un clf
    model, bbox_preds, accuracies['bbox'] = prep_bbox_out

    # Train PAPERNOT substitute
    print("Training the substitute model.")
    train_sub_out = train_sub(sess, x, y, bbox_preds, X_sub, Y_sub,
                              nb_classes, nb_epochs_s, batch_size,
                              learning_rate, data_aug, lmbda, rng=rng)
    model_sub, preds_sub = train_sub_out
    
    
    # Train OUR subtitute
    
    
    # Evaluate the Papernot substitute model on clean test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_sub, X_test, Y_test, args=eval_params)
    accuracies['sub'] = acc
    return acc
nb_classes=2 #
batch_size=2 #FLAGS.batch_size
learning_rate=0.001 #FLAGS.learning_rate
nb_epochs=1 #FLAGS.nb_epochs, 
holdout=10 #FLAGS.holdout
data_aug=3 #FLAGS.data_aug, 
nb_epochs_s=1 #FLAGS.nb_epochs_s,
lmbda=.01 #FLAGS.lmbda

main_fidelity()

Preparing the black-box model.
Test accuracy of black-box on legitimate test examples: 0.88
Training the substitute model.
Defined TensorFlow model graph for the substitute.
Substitute training epoch #0


[INFO 2018-06-21 15:25:24,045 cleverhans] Epoch 0 took 0.06192493438720703 seconds
INFO:cleverhans:Epoch 0 took 0.06192493438720703 seconds
[INFO 2018-06-21 15:25:24,046 cleverhans] Completed model training.
INFO:cleverhans:Completed model training.


Augmenting substitute training data.
Labeling substitute training data.


TypeError: Fetch argument array([[ 0.  ,  1.  ],
       [ 0.  ,  1.  ],
       [ 0.48,  0.52],
       [ 1.  ,  0.  ],
       [ 0.14,  0.86],
       [ 0.05,  0.95],
       [ 0.95,  0.05],
       [ 1.  ,  0.  ],
       [ 0.69,  0.31],
       [ 0.  ,  1.  ],
       [ 0.53,  0.47],
       [ 0.03,  0.97],
       [ 0.96,  0.04],
       [ 0.89,  0.11],
       [ 0.  ,  1.  ],
       [ 1.  ,  0.  ],
       [ 1.  ,  0.  ],
       [ 0.  ,  1.  ],
       [ 0.16,  0.84],
       [ 0.  ,  1.  ],
       [ 0.11,  0.89],
       [ 1.  ,  0.  ],
       [ 0.  ,  1.  ],
       [ 1.  ,  0.  ],
       [ 0.17,  0.83],
       [ 0.19,  0.81],
       [ 0.  ,  1.  ],
       [ 0.01,  0.99],
       [ 0.11,  0.89],
       [ 1.  ,  0.  ],
       [ 1.  ,  0.  ],
       [ 0.01,  0.99],
       [ 0.64,  0.36],
       [ 0.5 ,  0.5 ],
       [ 0.  ,  1.  ],
       [ 0.69,  0.31],
       [ 0.29,  0.71],
       [ 1.  ,  0.  ],
       [ 0.02,  0.98],
       [ 0.99,  0.01],
       [ 1.  ,  0.  ],
       [ 0.56,  0.44],
       [ 0.  ,  1.  ],
       [ 0.06,  0.94],
       [ 0.01,  0.99],
       [ 0.  ,  1.  ],
       [ 0.03,  0.97],
       [ 0.97,  0.03],
       [ 0.07,  0.93],
       [ 0.03,  0.97],
       [ 1.  ,  0.  ],
       [ 0.8 ,  0.2 ],
       [ 0.67,  0.33],
       [ 0.99,  0.01],
       [ 0.05,  0.95],
       [ 1.  ,  0.  ],
       [ 0.  ,  1.  ],
       [ 0.99,  0.01],
       [ 0.11,  0.89],
       [ 0.12,  0.88],
       [ 0.01,  0.99],
       [ 0.97,  0.03],
       [ 1.  ,  0.  ],
       [ 0.02,  0.98],
       [ 1.  ,  0.  ],
       [ 0.  ,  1.  ],
       [ 1.  ,  0.  ],
       [ 0.06,  0.94],
       [ 0.38,  0.62],
       [ 1.  ,  0.  ],
       [ 0.  ,  1.  ],
       [ 0.92,  0.08],
       [ 0.21,  0.79],
       [ 1.  ,  0.  ],
       [ 0.98,  0.02],
       [ 0.93,  0.07],
       [ 0.88,  0.12],
       [ 0.03,  0.97],
       [ 0.99,  0.01],
       [ 0.  ,  1.  ],
       [ 0.47,  0.53],
       [ 1.  ,  0.  ],
       [ 0.09,  0.91],
       [ 1.  ,  0.  ],
       [ 0.3 ,  0.7 ],
       [ 0.97,  0.03],
       [ 0.71,  0.29],
       [ 0.99,  0.01],
       [ 0.58,  0.42],
       [ 0.87,  0.13],
       [ 0.03,  0.97],
       [ 0.94,  0.06],
       [ 0.14,  0.86],
       [ 0.75,  0.25],
       [ 0.  ,  1.  ],
       [ 0.19,  0.81],
       [ 0.06,  0.94],
       [ 1.  ,  0.  ],
       [ 0.74,  0.26],
       [ 1.  ,  0.  ],
       [ 0.97,  0.03],
       [ 0.99,  0.01],
       [ 0.99,  0.01],
       [ 0.97,  0.03],
       [ 0.02,  0.98],
       [ 0.01,  0.99],
       [ 0.  ,  1.  ],
       [ 1.  ,  0.  ],
       [ 0.99,  0.01],
       [ 0.  ,  1.  ],
       [ 0.  ,  1.  ],
       [ 0.11,  0.89],
       [ 0.15,  0.85],
       [ 0.14,  0.86],
       [ 0.99,  0.01],
       [ 0.66,  0.34],
       [ 0.  ,  1.  ],
       [ 1.  ,  0.  ],
       [ 0.75,  0.25],
       [ 0.  ,  1.  ],
       [ 0.32,  0.68],
       [ 0.02,  0.98],
       [ 0.01,  0.99],
       [ 0.  ,  1.  ],
       [ 0.85,  0.15],
       [ 0.  ,  1.  ],
       [ 0.01,  0.99],
       [ 1.  ,  0.  ],
       [ 0.01,  0.99],
       [ 0.01,  0.99],
       [ 0.93,  0.07],
       [ 0.  ,  1.  ],
       [ 1.  ,  0.  ],
       [ 0.44,  0.56],
       [ 0.98,  0.02],
       [ 0.01,  0.99],
       [ 0.17,  0.83],
       [ 0.08,  0.92],
       [ 0.02,  0.98],
       [ 1.  ,  0.  ],
       [ 0.99,  0.01],
       [ 0.95,  0.05],
       [ 0.21,  0.79],
       [ 0.96,  0.04],
       [ 0.  ,  1.  ],
       [ 0.  ,  1.  ],
       [ 0.74,  0.26],
       [ 0.  ,  1.  ],
       [ 1.  ,  0.  ],
       [ 0.  ,  1.  ],
       [ 0.85,  0.15],
       [ 0.  ,  1.  ],
       [ 0.98,  0.02],
       [ 0.  ,  1.  ],
       [ 0.32,  0.68],
       [ 1.  ,  0.  ],
       [ 1.  ,  0.  ],
       [ 0.2 ,  0.8 ],
       [ 0.01,  0.99],
       [ 0.91,  0.09],
       [ 0.07,  0.93],
       [ 0.  ,  1.  ],
       [ 0.5 ,  0.5 ],
       [ 0.  ,  1.  ],
       [ 0.43,  0.57],
       [ 0.03,  0.97],
       [ 0.7 ,  0.3 ],
       [ 0.  ,  1.  ],
       [ 0.2 ,  0.8 ],
       [ 0.99,  0.01],
       [ 0.  ,  1.  ],
       [ 0.64,  0.36],
       [ 0.12,  0.88],
       [ 0.53,  0.47],
       [ 0.93,  0.07],
       [ 0.  ,  1.  ],
       [ 0.09,  0.91],
       [ 0.9 ,  0.1 ],
       [ 0.03,  0.97],
       [ 0.97,  0.03],
       [ 0.63,  0.37],
       [ 0.02,  0.98],
       [ 0.33,  0.67],
       [ 0.3 ,  0.7 ],
       [ 0.  ,  1.  ],
       [ 0.9 ,  0.1 ],
       [ 0.  ,  1.  ],
       [ 1.  ,  0.  ],
       [ 0.94,  0.06],
       [ 0.95,  0.05],
       [ 1.  ,  0.  ],
       [ 0.  ,  1.  ],
       [ 1.  ,  0.  ],
       [ 0.04,  0.96],
       [ 0.87,  0.13],
       [ 1.  ,  0.  ],
       [ 1.  ,  0.  ],
       [ 0.03,  0.97],
       [ 0.1 ,  0.9 ],
       [ 0.83,  0.17]]) has invalid type <class 'numpy.ndarray'>, must be a string or Tensor. (Can not convert a ndarray into a Tensor or Operation.)