In [1]:
from numpy.random import seed
seed(4)
from tensorflow import set_random_seed
set_random_seed(26)

In [2]:
from os import path
from train_mnist_nn import __save_trained_model
from utils import load_MNIST, load_model
from utils import save_perturbed_test_groups, load_perturbed_test_groups
from utils import load_suspicious_neurons, save_suspicious_neurons
from utils import create_experiment_dir, get_trainable_layers
from utils import load_classifications, save_classifications
from utils import save_layer_outs, load_layer_outs, construct_spectrum_matrices
from utils import load_MNIST, load_CIFAR, load_model
from utils import filter_val_set, save_original_inputs
from input_synthesis import synthesize
from test_nn import test_model
from spectrum_analysis import *
import matplotlib.pyplot as plt
import datetime

Using TensorFlow backend.


In [3]:
from keras import models
from keras.models import model_from_json

In [4]:
model_path = "neural_networks"


experiment_path = "experiment_results"
model_path = "neural_networks"
group_index = 1
__version__ = "v1.0"


nb_classes = 10
perturbed_count = 10

In [5]:
args =  {
    'model': 'mnist_test_model_0_0',
    'suspicious_num': 10
}

args = defaultdict(lambda: None, args)

model_name     = args['model']
dataset        = args['dataset'] if not args['dataset'] == None else 'mnist'
step_size      = args['step_size'] if not args['step_size'] == None else 1
distance       = args['distance'] if not args['distance'] ==None else 0.1
approach       = args['approach'] if not args['approach'] == None else 'tarantula'
susp_num       = args['suspicious_num'] if not args['suspicious_num'] == None else 1
repeat         = args['repeat'] if not args['repeat'] == None else 1
star           = args['star'] if not args['star'] == None else 3
logfile_name   = args['logfile'] if not args['logfile'] == None else 'result.log'

In [6]:
model = load_model(path.join(model_path, model_name))

('Model structure loaded from ', 'neural_networks/mnist_test_model_0_0')


In [7]:
X_train, Y_train, X_test, Y_test = load_MNIST(channel_first=False)

In [8]:
def exp(selected_class, X, Y):

    logfile = open(logfile_name, 'a')

    #Fault localization is done per class.
    X, Y = filter_val_set(selected_class, X, Y)

    ####################
    # 2)test the model and receive the indexes of correct and incorrect classifications
    # Also provide output of each neuron in each layer for test input x.
    correct_classifications, misclassifications, layer_outs, predictions = test_model(model, X, Y)

    ####################
    # 3) Receive the correct classifications  & misclassifications and identify 
    # the suspicious neurons per layer
    trainable_layers = get_trainable_layers(model)
    scores, num_cf, num_uf, num_cs, num_us = construct_spectrum_matrices(model,
                                                                        trainable_layers,
                                                                        correct_classifications,
                                                                        misclassifications,
                                                                        layer_outs)

    filename = experiment_path + '/' + model_name + '_C' + str(selected_class) + '_' +\
    approach +  '_SN' +  str(susp_num)

    if approach == 'tarantula':
        suspicious_neuron_idx = tarantula_analysis(trainable_layers, scores,
                                             num_cf, num_uf, num_cs, num_us,
                                             susp_num)
    else:
        print('Wrong approach')
        exit()


    logfile.write('Suspicous neurons: ' + str(suspicious_neuron_idx) + '\n')

    ####################
    # 4) Run Suspiciousness-Guided Input Synthesis Algorithm
    # Receive the set of suspicious neurons for each layer from Step 3 # and 
    # will produce new inputs based on the correct classifications (from the 
    # testing set) that exercise the suspicious neurons

    perturbed_xs = []
    perturbed_ys = []

    # select 10 inputs randomly from the correct classification set.
    #selected = list(correct_classifications)
    if perturbed_count == -1:
        selected = list(correct_classifications)
    else:
        selected = np.random.choice(list(correct_classifications), size=perturbed_count, replace=False)
        
    zipped_data = zip(list(np.array(X)[selected]), list(np.array(Y)[selected]))

    syn_start = datetime.datetime.now()
    x_perturbed, y_perturbed, x_original = synthesize(model, zipped_data,
                                           suspicious_neuron_idx,
                                           correct_classifications,
                                           step_size,
                                           distance)
    syn_end = datetime.datetime.now()

    perturbed_xs = perturbed_xs + x_perturbed
    perturbed_ys = perturbed_ys + y_perturbed

    # reshape them into the expected format
    perturbed_xs = np.asarray(perturbed_xs).reshape(np.asarray(perturbed_xs).shape[0],
                                     *X[0].shape)
 
    perturbed_ys = np.asarray(perturbed_ys).reshape(np.asarray(perturbed_ys).shape[0], 10)

    for i in range(len(perturbed_xs)):
        name = 'susp_adv_inputs/'+model_name+'_'+str(perturbed_ys[i])+'_C'+str(selected_class)+'_susp_guided_'+str(selected[i])+'.png'
        plt.imsave(name, perturbed_xs[i].reshape(28,28), cmap='gray')

    ####################
    # 5) Test if the mutated inputs are adversarial
    score = model.evaluate(perturbed_xs, perturbed_ys, verbose=0)
    logfile.write('Model: ' + model_name + ', Class: ' + str(selected_class) +
                  ', Approach: ' + approach + ', Distance: ' +
                  str(distance) + ', Score: ' + str(score) + '\n')

    logfile.write('Input Synthesis Time: ' + str(syn_end-syn_start) + '\n')

    logfile.close()
    
    return perturbed_xs, perturbed_ys, suspicious_neuron_idx

In [9]:
perturbed_xs_by_class, perturbed_ys_by_class, sus_ids_by_class = range(nb_classes), range(nb_classes), range(nb_classes)

for selected_class in range(nb_classes):
    perturbed_xs_by_class[selected_class], perturbed_ys_by_class[selected_class], sus_ids_by_class[selected_class] = exp(selected_class, X_train, Y_train)

Validation set filtered for desired class: 0
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 28, 28, 4)         104       
_________________________________________________________________
block1_pool1 (MaxPooling2D)  (None, 14, 14, 4)         0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 14, 14, 12)        1212      
_________________________________________________________________
block2_pool1 (MaxPooling2D)  (None, 7, 7, 12)          0         
_________________________________________________________________
flatten (Flatten)            (None, 588)               0         
_________________________________________________________________
softmax (Dense)              (N

  'recall', 'true', average, warn_for)


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5923
           2       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

   micro avg       1.00      1.00      1.00      5923
   macro avg       0.25      0.25      0.25      5923
weighted avg       1.00      1.00      1.00      5923

[[5920    1    1    1]
 [   0    0    0    0]
 [   0    0    0    0]
 [   0    0    0    0]]
Caught this error: Exception("Accuracy results don't match to score",)
Testing done!

(None, 28, 28, 4)
(None, 14, 14, 12)
('Input index:', 5)
('Input index:', 10)
Validation set filtered for desired class: 1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________

[loss, accuracy] -> [0.01654736935211271, 0.9958918177889701]
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           4       1.00      1.00      1.00      5842
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

   micro avg       1.00      1.00      1.00      5842
   macro avg       0.14      0.14      0.14      5842
weighted avg       1.00      1.00      1.00      5842

[[   0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0]
 [   5    2 5818    1    1    1   14]
 [   0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0]]
Caught this error: Exception("Accuracy results don't match to score",)
Tes

('Input index:', 5)
('Input index:', 10)
Validation set filtered for desired class: 8
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 28, 28, 4)         104       
_________________________________________________________________
block1_pool1 (MaxPooling2D)  (None, 14, 14, 4)         0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 14, 14, 12)        1212      
_________________________________________________________________
block2_pool1 (MaxPooling2D)  (None, 7, 7, 12)          0         
_________________________________________________________________
flatten (Flatten)            (None, 588)               0         
________________________________________________________

In [11]:
np.save('perturbed_xs_by_class', perturbed_xs_by_class)
np.save('perturbed_ys_by_class', perturbed_ys_by_class)