In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import os
from bisect import bisect_left
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from six.moves import xrange
import enum
import tensorflow as tf

from cleverhans.attacks import FastGradientMethod
from cleverhans.loss import CrossEntropy
from cleverhans.dataset import MNIST
from cleverhans.model import Model
from cleverhans.picklable_model import MLP, Conv2D, ReLU, Flatten, Linear, Softmax
from cleverhans.train import train
from cleverhans.utils_tf import batch_eval, model_eval
from cleverhans import serial
from pathlib import Path

from dknn import DkNNModel, get_tensorflow_session, make_basic_picklable_cnn

In [3]:
import sys
sys.argv = sys.argv[:1]
FLAGS = tf.app.flags.FLAGS

In [4]:
tf.flags.DEFINE_integer(
    'number_bits',
    17,
    'number of hash bits used by LSH Index'
  )
tf.flags.DEFINE_float(
    'tensorflow_gpu_memory_fraction',
    0.25,
    'amount of the GPU memory to allocate for a tensorflow Session'
  )
tf.flags.DEFINE_integer('nb_epochs', 6, 'Number of epochs to train model')
tf.flags.DEFINE_integer('batch_size', 500, 'Size of training batches')
tf.flags.DEFINE_float('lr', 0.001, 'Learning rate for training')

tf.flags.DEFINE_integer(
      'nb_cali', 750, 'Number of calibration points for the DkNN')
tf.flags.DEFINE_integer(
      'neighbors', 75, 'Number of neighbors per layer for the DkNN')

In [5]:
mnist = MNIST(train_start=0, train_end=10000, test_start=0, test_end=1000)
x_train, y_train = mnist.get_set('train')
x_test, y_test = mnist.get_set('test')

# Use Image Parameters.
img_rows, img_cols, nchannels = x_train.shape[1:4]
nb_classes = y_train.shape[1]

In [None]:
with get_tensorflow_session() as sess:
    with tf.variable_scope('dknn'):
        # Define input TF placeholder.
        x = tf.placeholder(tf.float32, shape=(
          None, img_rows, img_cols, nchannels))
        y = tf.placeholder(tf.float32, shape=(None, nb_classes))

        # Define a model.
        model = make_basic_picklable_cnn()
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=0.)

        # Define the test set accuracy evaluation.
        def evaluate():
            acc = model_eval(sess, x, y, preds, x_test, y_test,
                             args={'batch_size': FLAGS.batch_size})
            print('Test accuracy on test examples: %0.4f' % acc)

        # Train the model
        train_params = {'nb_epochs': FLAGS.nb_epochs,
                      'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.lr}

        data_filepath = "model.joblib"
        path = Path(data_filepath)

        if path.is_file():
            model = serial.load(data_filepath)
        else:
            train(sess, loss, x_train, y_train, evaluate=evaluate,
              args=train_params, var_list=model.get_params())
            serial.save("model.joblib", model)

          # Define callable that returns a dictionary of all activations for a dataset
        def get_activations(data):
            data_activations = {}
            for layer in layers:
                layer_sym = tf.layers.flatten(model.get_layer(x, layer))
                data_activations[layer] = batch_eval(sess, [x], [layer_sym], [data],
                                                   args={'batch_size': FLAGS.batch_size})[0]
            return data_activations

        # Use a holdout of the test set to simulate calibration data for the DkNN.
        train_data = x_train
        train_labels = np.argmax(y_train, axis=1)
        cali_data = x_test[:FLAGS.nb_cali]
        y_cali = y_test[:FLAGS.nb_cali]
        cali_labels = np.argmax(y_cali, axis=1)
        test_data = x_test[FLAGS.nb_cali:]
        y_test = y_test[FLAGS.nb_cali:]

        # Extract representations for the training and calibration data at each layer of interest to the DkNN.
        layers = ['ReLU1', 'ReLU3', 'ReLU5', 'logits']

        # Wrap the model into a DkNNModel
        dknn = DkNNModel(
        FLAGS.neighbors,
        layers,
        get_activations,
        train_data,
        train_labels,
        nb_classes,
        scope='dknn'
        )





Instructions for updating:
keep_dims is deprecated, use keepdims instead

Instructions for updating:
dim is deprecated, use axis instead
num_devices:  1










[INFO 2019-10-29 16:15:28,553 cleverhans] Epoch 0 took 16.651572942733765 seconds


Test accuracy on test examples: 0.8890


In [None]:
from utils_kernel import euclidean_kernel, hard_geodesics_euclidean_kernel
from utils_visualization import plot_kernel

In [None]:
hola_x = x_train[0:10000].reshape((10000, 28*28))
hola_y = train_labels[0:10000]
hola_x

In [None]:
euclidean_matrix = euclidean_kernel(hola_x)
max_distance = np.max(euclidean_matrix)+1
euclidean_matrix[euclidean_matrix == 0]=max_distance
plot_kernel(euclidean_matrix)

In [None]:
geodesic_euclidean_matrix = hard_geodesics_euclidean_kernel(hola_x, 5)
max_distance = np.max(geodesic_euclidean_matrix)+1
geodesic_euclidean_matrix[geodesic_euclidean_matrix == 0]=max_distance
plot_kernel(geodesic_euclidean_matrix)

In [None]:
ks = range(1,20)
same_class_euclidean = np.zeros(len(ks))
same_class_geodesic = np.zeros(len(ks))
for j,k in enumerate(ks):
    acum_euc = 0
    acum_geo = 0
    for i in range(10000):
        euclidean_neighbors_idx = np.argpartition(euclidean_matrix[i,:],k)[:k]
        acum_euc += np.mean(hola_y[i]==hola_y[euclidean_neighbors_idx])

        geodesic_neighbors_idx = np.argpartition(geodesic_euclidean_matrix[i,:],k)[:k]
        acum_geo += np.mean(hola_y[i]==hola_y[geodesic_neighbors_idx])
    same_class_euclidean[j] = acum_euc/10000
    same_class_geodesic[j] = acum_geo/10000
    print(j)

In [None]:
plt.plot(range(1,20),same_class_euclidean)
plt.plot(range(1,20),same_class_geodesic)

In [None]:
import pickle

In [None]:
matrix_path = '../results/geodesic_matrices_1000_5.pkl'

In [None]:
with open(matrix_path, 'rb') as f:
    hola = pickle.load(f)