In [1]:
from test_nn import test_model
from lp import run_lp
from os import path
from spectrum_analysis import *
from utils import save_perturbed_test_groups, load_perturbed_test_groups
from utils import load_suspicious_neurons, save_suspicious_neurons
from utils import create_experiment_dir, get_trainable_layers
from utils import load_classifications, save_classifications
from utils import save_layer_outs, load_layer_outs, construct_spectrum_matrices
from utils import load_MNIST, load_CIFAR, load_model
from utils import filter_val_set, save_original_inputs
from input_synthesis import synthesize
from sklearn.model_selection import train_test_split
import datetime
import argparse
import random
from collections import defaultdict
import matplotlib.pyplot as plt
from keras import models

Using TensorFlow backend.


In [2]:
experiment_path = "experiment_results"
model_path = "neural_networks"
group_index = 1
__version__ = "v1.0"


nb_classes = 10
perturbed_count = -1

In [3]:
args =  {
    'model': 'mnist_test_model_5_30_relu',
    'suspicious_num': 10
}

args = defaultdict(lambda: None, args)

In [4]:
model_name     = args['model']
dataset        = args['dataset'] if not args['dataset'] == None else 'mnist'
step_size      = args['step_size'] if not args['step_size'] == None else 1
distance       = args['distance'] if not args['distance'] ==None else 0.1
approach       = args['approach'] if not args['approach'] == None else 'tarantula'
susp_num       = args['suspicious_num'] if not args['suspicious_num'] == None else 1
repeat         = args['repeat'] if not args['repeat'] == None else 1
seed           = args['seed'] if not args['seed'] == None else random.randint(0,10)
star           = args['star'] if not args['star'] == None else 3
logfile_name   = args['logfile'] if not args['logfile'] == None else 'result.log'

In [5]:
####################
# 0) Load MNIST or CIFAR10 data
if dataset == 'mnist':
    X_train, Y_train, X_test, Y_test = load_MNIST()
else:
    X_train, Y_train, X_test, Y_test = load_CIFAR()


X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train,
                                                  test_size=1/6.0,
                                                  random_state=seed)

In [6]:
####################
# 1) Load the pretrained network.
try:
    model = load_model(path.join(model_path, model_name))
except:
    logfile.write("Model not found! Provide a pre-trained model model as input.")
    exit(1)

('Model structure loaded from ', 'neural_networks/mnist_test_model_5_30_relu')


In [7]:
def exp(selected_class):

    logfile = open(logfile_name, 'a')

    experiment_name = create_experiment_dir(experiment_path, model_name,
                                            selected_class, step_size,
                                            approach, susp_num, repeat)

    #Fault localization is done per class.
    X_val, Y_val = filter_val_set(selected_class, X_test, Y_test)


    ####################
    # 2)test the model and receive the indexes of correct and incorrect classifications
    # Also provide output of each neuron in each layer for test input x.
    filename = experiment_path + '/' + model_name + '_' + str(selected_class)
    try:
        correct_classifications, misclassifications = load_classifications(filename, group_index)
        layer_outs = load_layer_outs(filename, group_index)
    except:
        correct_classifications, misclassifications, layer_outs, predictions =\
                test_model(model, X_val, Y_val)
        save_classifications(correct_classifications, misclassifications,
                             filename, group_index)
        save_layer_outs(layer_outs, filename, group_index)


    ####################
    # 3) Receive the correct classifications  & misclassifications and identify 
    # the suspicious neurons per layer
    trainable_layers = get_trainable_layers(model)
    scores, num_cf, num_uf, num_cs, num_us = construct_spectrum_matrices(model,
                                                                        trainable_layers,
                                                                        correct_classifications,
                                                                        misclassifications,
                                                                        layer_outs)

    filename = experiment_path + '/' + model_name + '_C' + str(selected_class) + '_' +\
    approach +  '_SN' +  str(susp_num)

    if approach == 'tarantula':
        try:
            suspicious_neuron_idx = load_suspicious_neurons(filename, group_index)
        except:
            suspicious_neuron_idx = tarantula_analysis(trainable_layers, scores,
                                                 num_cf, num_uf, num_cs, num_us,
                                                 susp_num)

            save_suspicious_neurons(suspicious_neuron_idx, filename, group_index)

    elif approach == 'ochiai':
        try:
            suspicious_neuron_idx = load_suspicious_neurons(filename, group_index)
        except:
            suspicious_neuron_idx = ochiai_analysis(trainable_layers, scores,
                                                 num_cf, num_uf, num_cs, num_us,
                                                 susp_num)

            save_suspicious_neurons(suspicious_neuron_idx, filename, group_index)

    elif approach == 'dstar':
        try:
            suspicious_neuron_idx = load_suspicious_neurons(filename, group_index)
        except:
            suspicious_neuron_idx = dstar_analysis(trainable_layers, scores,
                                                 num_cf, num_uf, num_cs, num_us,
                                                 susp_num, star)

            save_suspicious_neurons(suspicious_neuron_idx, filename, group_index)

    elif approach == 'random':
        # Random fault localization has to be run after running Tarantula,
        # Ochiai and DStar with the same parameters.

        filename = experiment_path + '/' + model_name + '_C' + str(selected_class) \
        + '_tarantula_' + 'SN' + str(susp_num)

        suspicious_neuron_idx_tarantula = load_suspicious_neurons(filename, group_index)

        filename = experiment_path + '/' + model_name + '_C' + str(selected_class) \
        + '_ochiai_' + 'SN' + str(susp_num)

        suspicious_neuron_idx_ochiai = load_suspicious_neurons(filename, group_index)

        filename = experiment_path + '/' + model_name + '_C' + str(selected_class) \
        + '_dstar_' + 'SN' + str(susp_num)

        suspicious_neuron_idx_dstar = load_suspicious_neurons(filename, group_index)

        forbiddens = suspicious_neuron_idx_ochiai + suspicious_neuron_idx_tarantula  + \
        suspicious_neuron_idx_dstar

        forbiddens = [list(forb) for forb in forbiddens]

        available_layers = list(([elem[0] for elem in suspicious_neuron_idx_tarantula]))
        available_layers += list(set([elem[0] for elem in suspicious_neuron_idx_ochiai]))
        available_layers += list(set([elem[0] for elem in suspicious_neuron_idx_dstar]))

        suspicious_neuron_idx = []
        while len(suspicious_neuron_idx) < susp_num:
            l_idx = random.choice(available_layers)
            n_idx = random.choice(range(model.layers[l_idx].output_shape[1]))

            if [l_idx, n_idx] not in forbiddens and [l_idx, n_idx] not in suspicious_neuron_idx:
                suspicious_neuron_idx.append([l_idx, n_idx])


    logfile.write('Suspicous neurons: ' + str(suspicious_neuron_idx) + '\n')

    ####################
    # 4) Run Suspiciousness-Guided Input Synthesis Algorithm
    # Receive the set of suspicious neurons for each layer from Step 3 # and 
    # will produce new inputs based on the correct classifications (from the 
    # testing set) that exercise the suspicious neurons

    perturbed_xs = []
    perturbed_ys = []

    # select 10 inputs randomly from the correct classification set.
    #selected = list(correct_classifications)
    if perturbed_count == -1:
        selected = list(correct_classifications)
    else:
        selected = np.random.choice(list(correct_classifications), perturbed_count)
        
    zipped_data = zip(list(np.array(X_val)[selected]), list(np.array(Y_val)[selected]))

    syn_start = datetime.datetime.now()
    x_perturbed, y_perturbed, x_original = synthesize(model, zipped_data,
                                           suspicious_neuron_idx,
                                           correct_classifications,
                                           step_size,
                                           distance)
    syn_end = datetime.datetime.now()

    perturbed_xs = perturbed_xs + x_perturbed
    perturbed_ys = perturbed_ys + y_perturbed

    # reshape them into the expected format
    perturbed_xs = np.asarray(perturbed_xs).reshape(np.asarray(perturbed_xs).shape[0],
                                     *X_val[0].shape)
 
    perturbed_ys = np.asarray(perturbed_ys).reshape(np.asarray(perturbed_ys).shape[0], 10)

    #save perturtbed inputs
    filename = path.join(experiment_path, experiment_name)
    try:
        save_perturbed_test_groups(perturbed_xs, perturbed_ys, filename, group_index)
        save_original_inputs(x_original, filename, group_index)
    except:
        pass


    ####################
    # 5) Test if the mutated inputs are adversarial
    score = model.evaluate(perturbed_xs, perturbed_ys, verbose=0)
    logfile.write('Model: ' + model_name + ', Class: ' + str(selected_class) +
                  ', Approach: ' + approach + ', Distance: ' +
                  str(distance) + ', Score: ' + str(score) + '\n')

    logfile.write('Input Synthesis Time: ' + str(syn_end-syn_start) + '\n')

    logfile.close()
    
    return perturbed_xs, perturbed_ys, suspicious_neuron_idx

In [8]:
'''
Currently not available
####################
# 6) retrain the model
# train_model_fault_localisation(model, x_perturbed, y_perturbed, len(x_perturbed))
model.fit(x_perturbed, y_perturbed, batch_size=32, epochs=10, verbose=1)

####################
# 7) retest the model
test_model(model, X_test, Y_test)
'''



'\nCurrently not available\n####################\n# 6) retrain the model\n# train_model_fault_localisation(model, x_perturbed, y_perturbed, len(x_perturbed))\nmodel.fit(x_perturbed, y_perturbed, batch_size=32, epochs=10, verbose=1)\n\n####################\n# 7) retest the model\ntest_model(model, X_test, Y_test)\n'

In [9]:
initial = model.evaluate(X_test, Y_test)



In [10]:
perturbed_xs_by_class, perturbed_ys_by_class, sus_ids_by_class = range(nb_classes), range(nb_classes), range(nb_classes)

for selected_class in range(nb_classes):
    perturbed_xs_by_class[selected_class], perturbed_ys_by_class[selected_class], sus_ids_by_class[selected_class] = exp(selected_class)

for selected_class in range(nb_classes):
    model.fit(perturbed_xs_by_class[selected_class], perturbed_ys_by_class[selected_class], epochs=1)

Validation set filtered for desired class: 0
experiment_results/mnist_test_model_5_30_relu_0_classifications.h5
('Classifications loaded from ', 'experiment_results/mnist_test_model_5_30_relu_0_classifications.h5')
('Layer outs loaded from ', 'experiment_results/mnist_test_model_5_30_relu_0_layer_outs.h5')




('Suspicious neurons  loaded from ', 'experiment_results/mnist_test_model_5_30_relu_C0_tarantula_SN10_suspicious_neurons.h5')
('Input index:', 10)
('Input index:', 20)
('Input index:', 30)


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/anaconda3/envs/tensorflow_env_p27/lib/python2.7/site-packages/IPython/core/ultratb.py", line 1132, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/anaconda3/envs/tensorflow_env_p27/lib/python2.7/site-packages/IPython/core/ultratb.py", line 313, in wrapped
    return f(*args, **kwargs)
  File "/anaconda3/envs/tensorflow_env_p27/lib/python2.7/site-packages/IPython/core/ultratb.py", line 358, in _fixed_getinnerframes
    records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
  File "/anaconda3/envs/tensorflow_env_p27/lib/python2.7/inspect.py", line 1051, in getinnerframes
    framelist.append((tb.tb_frame,) + getframeinfo(tb, context))
  File "/anaconda3/envs/tensorflow_env_p27/lib/python2.7/inspect.py", line 1011, in getframeinfo
    filename = getsourcefile(frame) or getfile(frame)
  File "/anaconda3/envs/tensorflow_env_p27/lib/python2.7/inspect.py", line 453, in getsour

IndexError: string index out of range

In [None]:
a=np.concatenate(perturbed_xs_by_class)
a=np.concatenate([a, X_train])
b=np.concatenate(perturbed_ys_by_class)
b=np.concatenate([b, Y_train])

In [None]:
print(initial)

In [None]:
new_model = models.model_from_json(model.to_json())
new_model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
new_model.fit(a, b, epochs=10, batch_size=32)

In [None]:
retrained_metrics = new_model.evaluate(X_test, Y_test)
print(retrained_metrics)