In [1]:
from test_nn import test_model
from lp import run_lp
from os import path
from spectrum_analysis import *
from utils import save_perturbed_test_groups, load_perturbed_test_groups
from utils import load_suspicious_neurons, save_suspicious_neurons
from utils import create_experiment_dir, get_trainable_layers
from utils import load_classifications, save_classifications
from utils import save_layer_outs, load_layer_outs, construct_spectrum_matrices
from utils import load_MNIST, load_CIFAR, load_model
from utils import filter_val_set, save_original_inputs
from input_synthesis import synthesize
from sklearn.model_selection import train_test_split
import datetime
import argparse
import random
from collections import defaultdict
import matplotlib.pyplot as plt
from keras import models
from utils import generate_adversarial
from utils import get_layer_outs

Using TensorFlow backend.


In [2]:
seed = 42
model_path = "neural_networks"
plt.rcParams["figure.figsize"] = (36,18)

In [3]:
def exp(X, Y, model_name, actual_class, susp_num, adversarial_algo):
    X, Y = filter_val_set(actual_class, X, Y)
    model = load_model(path.join(model_path, model_name))
    
    correct_classifications, misclassifications, layer_outs, predictions = test_model(model, X, Y)
    
    trainable_layers = get_trainable_layers(model)
    scores, num_cf, num_uf, num_cs, num_us = construct_spectrum_matrices(model,
                                                                        trainable_layers,
                                                                        correct_classifications,
                                                                        misclassifications,
                                                                        layer_outs)
    
    suspicious_neuron_idx = tarantula_analysis(trainable_layers, scores,
                                                 num_cf, num_uf, num_cs, num_us,
                                                 susp_num)
    
    if adversarial_algo is 'fgsm':
        advs = generate_adversarial(X[correct_classifications], adversarial_algo, model, multi=True, eps=0.05)        
    else:
        advs = generate_adversarial(X[correct_classifications], adversarial_algo, model, multi=True)
    
    new_layer_outs = get_layer_outs(model, advs)
    
    avg_changes_by_layer = {}
    avg_changes = {}
    
    for layer_index in trainable_layers:
        avg_changes_by_layer[layer_index] = np.mean(new_layer_outs[layer_index][0] - layer_outs[layer_index][0][correct_classifications], axis=0)
    
    for layer_index in trainable_layers:
        for neuron_index in range(avg_changes_by_layer[layer_index].shape[-1]):
            avg_changes[(layer_index, neuron_index)] = avg_changes_by_layer[layer_index][neuron_index]
    
    # layer_outs, new_layer_outs
    return [ (trainable_layers[layer], neuron) for (layer, neuron) in suspicious_neuron_idx ], correct_classifications, misclassifications, layer_outs, new_layer_outs, avg_changes

In [4]:
X_train, Y_train, X_test, Y_test = load_MNIST()

X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train,
                                                  test_size=1/6.0,
                                                  random_state=seed)

In [14]:
def plot_exp(model_name, adv_algo, X, Y, n, name_suffix):
    plt.clf()
    
    for cl in range(1):
        ids, correct, _, outs, new_outs, changes = exp(X, Y, model_name, cl, n, adv_algo)
        plt.subplot(3, 4, cl + 1)
        plt.title('Class ' + str(cl))
        plt.scatter([str(k) for k in changes.keys()], changes.values(), c=[('r' if k in ids else 'y') for k in changes.keys()])
        
        for i, neuron_id in enumerate(ids):
            plt.annotate(str(neuron_id), (str(neuron_id), changes[neuron_id]))
            
    plot_name = 'adv_exp_figures/' + model_name + '__' + adv_algo + '__' + str(n) + '__' + name_suffix
    try:
        plt.savefig(plot_name + '.pdf')        
        plt.savefig(plot_name + '.png')        
    except:
        pass

In [None]:
plot_exp('mnist_test_model_5_30_relu', 'fgsm', X_train, Y_train, n=10, name_suffix='train')
plot_exp('mnist_test_model_5_30_relu', 'fgsm', X_val, Y_val, n=10, name_suffix='validate')
plot_exp('mnist_test_model_5_30_relu', 'fgsm', X_test, Y_test, n=10, name_suffix='test')

In [None]:
plot_exp('mnist_test_model_5_30_relu', 'jsma', X_train, Y_train, n=10, name_suffix='train')
plot_exp('mnist_test_model_5_30_relu', 'jsma', X_val, Y_val, n=10, name_suffix='validate')
plot_exp('mnist_test_model_5_30_relu', 'jsma', X_test, Y_test, n=10, name_suffix='test')

In [None]:
plot_exp('mnist_test_model_5_30_relu', 'cw', X_train, Y_train, n=10, name_suffix='train')
plot_exp('mnist_test_model_5_30_relu', 'cw', X_val, Y_val, n=10, name_suffix='validate')
plot_exp('mnist_test_model_5_30_relu', 'cw', X_test, Y_test, n=10, name_suffix='test')