In [None]:
%matplotlib inline
import network.network as Network
import network.mnist_loader as mnist_loader
import pickle
import matplotlib.pyplot as plt
import numpy as np

### Load Input Data

In [None]:
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

In [None]:
test_samples, test_labels = zip(*test_data)

### Load Trained Network

In [None]:
with open('network/trained_network.pkl', 'rb') as f:
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    net = u.load()

In [None]:
def generate_rubbish_sample(network, desired_label, steps, step_size):
    
    # generate random image
    x = np.random.normal(.5, .3, (784, 1))
    
    # modify image to desired adversarial sample
    for i in range(steps):
        derivative = net.input_derivative(x, desired_label)
        x -= step_size * derivative
    
    #plt.imshow(x.reshape(28,28), cmap='Greys')
    return x
    

In [None]:
def sneaky_adversarial(net, n, x_target, steps, eta, lam=.05):
    """
    net : network object
        neural network instance to use
    n : integer
        our goal label (just an int, the function transforms it into a one-hot vector)
    x_target : numpy vector
        our goal image for the adversarial example
    steps : integer
        number of steps for gradient descent
    eta : float
        step size for gradient descent
    lam : float
        lambda, our regularization parameter. Default is .05
    """
    
    # Set the goal output
    goal = np.zeros((10, 1))
    goal[n] = 1

    # Create a random image to initialize gradient descent with
    x = np.random.normal(.5, .3, (784, 1))

    # Gradient descent on the input
    for i in range(steps):
        # Calculate the derivative
        d = net.input_derivative(x,goal)
        
        # The GD update on x, with an added penalty to the cost function
        # ONLY CHANGE IS RIGHT HERE!!!
        x -= eta * (d + lam * (x - x_target))

    return x

# Wrapper function
def sneaky_generate(net, n, m):
    """
    n: int 0-9, the target number to match
    m: label of example image to use (from the test set)
    """
    
    # Find random instance of m in test set
    idx = np.random.randint(0,8000)
    while test_labels[idx] != m:
        idx += 1
    
    # Hardcode the parameters for the wrapper function
    a = sneaky_adversarial(net, n, test_samples[idx], 500, 1)
    x = np.round(net.feedforward(a), 2)
    
#     print('\nWhat we want our adversarial example to look like: ')
#     plt.imshow(test_samples[idx].reshape((28,28)), cmap='Greys')
#     plt.show()
    
#     print('\n')
    
#     print('Adversarial Example: ')
    
#     plt.imshow(a.reshape(28,28), cmap='Greys')
#     plt.show()
    
#     print('Network Prediction: ' + str(np.argmax(x)) + '\n')
    
#     print('Network Output: \n' + str(x) + '\n')
    
    return a

In [None]:
def recognize(sample):
    prediction = net.feedforward(sample)
    if max(prediction) < 0.5:
        return None
    else:
        return np.argmax(prediction)

In [None]:
def predict(sample):
    # Print the prediction of the network
    print('Network output: \n' + str(np.round(net.feedforward(sample), 2)) + '\n')
    print('Network prediction: ' + str(np.argmax(net.feedforward(sample))) + '\n')
    print('Actual image: ')
    
    # Draw the image
    plt.imshow(sample.reshape((28,28)), cmap='Greys')

In [None]:
advers_correct = 0
misclassified = 0

for i in range(len(test_samples)):
    sample = test_samples[i]
    sample_label = test_labels[i]
#     adversarial_sample = generate_rubbish_sample(net, sample_label, 1000, 1)
    advers_label = sample_label - 1
    if advers_label < 0:
        advers_label = 5
    adversarial_sample = sneaky_generate(net, sample_label, advers_label)
    
    correct = recognize(sample)
    if correct != sample_label:
        misclassified += 1
        continue
        
    adversarial = recognize(adversarial_sample)
    if (correct == adversarial):
        advers_correct += 1

# print('Dataset size:', len(test_samples))
print('Attacks successful:', advers_correct)
print('Misclassified:', misclassified)

### Select Detection Modules

In [None]:
def binary_threshold(sample):
    """Works for B&W images. Emphasizes the main features by taking all that is > 0.5 as 1 and 0 otherwise."""
    sample = (sample > 0.5).astype(float)
    return sample

### Run Experiment

In [None]:
def detect_adversarial_attacks(detection_method, data):
    attacks = 0
    misclassifications = 0
    
    for sample, label in data:
        original_sample = sample
        modified_sample = binary_threshold(sample)

        out_original = net.classify(original_sample)
        out_modified = net.classify(modified_sample)
        
        if out_original != out_modified:
            attacks += 1
            if out_original != label:
                print("Attack detected! Misclassification also happened though...")
        if out_original != label:
            missclassifications += 1