In [0]:
# If running on Google Colab, only cleverhans needs installation. This can be done via:
!pip install cleverhans

# If running locally, we've listed (TODO) our dependencies in requirements.txt, so the following
# should get everything up and running:
# !pip install -r requirements.txt

import numpy
import keras
import pandas
import requests
import io
import zipfile
import os
import re
import cleverhans
import tensorflow

from cleverhans.attacks import FastGradientMethod
from cleverhans.attacks import CarliniWagnerL2
from cleverhans.attacks import SaliencyMapMethod
from cleverhans.attacks_tf import jacobian_augmentation
from cleverhans.attacks_tf import jacobian_graph
from cleverhans.loss import CrossEntropy
from cleverhans.train import train
from cleverhans.utils_keras import KerasModelWrapper
from cleverhans.utils_tf import model_eval

from keras.models import Sequential
from keras.layers import Dense

numpy.random.seed(0xC0FFEE)
tensorflow.set_random_seed(0xC0FFEE)
rng = numpy.random.RandomState(0xC0FFEE)

# Dataset

## Loading data

Run the below code to download a copy of the dataset (if you don't already have it):

In [0]:
response = requests.get("http://www.schonlau.net/masquerade/masquerade-data.zip")

dataset_file = io.BytesIO(response.content)

zipped_dataset = zipfile.ZipFile(dataset_file)
zipped_dataset.extractall('data/masquerade-data')

In [0]:
# http://www.schonlau.net/intrusion.html
# download Masquerade Data (zip File)

import pandas as pd
directory = './data/masquerade-data'

In [0]:
def sorted_nicely( l ):
    """ Sorts the given iterable in the way that is expected.
 
    Required arguments:
    l -- The iterable to be sorted.
 
    """
    convert = lambda text: int(text) if text.isdigit() else text
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key = alphanum_key)

In [0]:
users = range(1,51)
df = pd.DataFrame()

for filename in sorted_nicely(os.listdir(directory)):
    user = pd.read_csv(os.path.join(directory, filename), header=None)
    df = pd.concat([df, user], axis = 1)
    
df.columns = sorted_nicely(os.listdir(directory))

We've loaded in the dataset, but need to do a little co-ercion to get it how we need. Firstly, make sure that all the values in this dataframe are categorical variables which share the same data type:

In [0]:
commands = numpy.unique(df)
command_dtype = pandas.api.types.CategoricalDtype(commands)

for column in df:
    df[column] = df[column].astype(command_dtype)

In [0]:
labelled, unlabelled = df.head(5000), df.tail(len(df) - 5000)  # ignore unlabeled

Plan is convert to the following format:

  user, command1?, command2?, ..., 
  
 so the first column is a label, and the second a one-hot encoding of the command.
 
 When we do the rolling window aggregation, we just sum the columns (per-user).
 
 Use [rolling window sampling](https://pcp.io/books/PCP_PG/html/LE42586-PARENT.html).

In [0]:
def rolling_window_command_counts(commands, window_size):
    
    # Save a copy the name of the series to add again to our output. This will preserve the mapping of
    # user identifier to (it's column header in the dataframe it came from), which in
    # this case is the user identifier. 
    user = commands.name

    # Convert the single column "which command was run?" to a column for each
    # command, which says "was command <x> run?"
    commands = pandas.get_dummies(commands)

    # Take a rolling sample of the last 100 commands, then sum each "was command <x> run?"
    # columns to give a bunch "command <x> was run <y> times in this window".
    command_counts = commands.rolling(window=window_size).aggregate(numpy.sum)

    # Remove the first 100 rows because they contain data from blocks of size < 100.
    command_counts = command_counts[window_size-1:]
    
    # Preserve the user identifier (see top of function) as a new column:
    
    # First, a nasty hack: https://github.com/pandas-dev/pandas/issues/19136
    command_counts = command_counts.rename(columns=str)  
    
    # Then, add in the user (with an adhoc parser to turn the label into a number)
    command_counts['user'] = int(user.replace('User', ''))

    return command_counts

# Example
rolling_window_command_counts(labelled['User1'], 100)

In [0]:
labelled_dataset = pandas.concat([
        rolling_window_command_counts(commands, 100)
        for user, commands in labelled.iteritems()
    ],
    ignore_index=True,  # reset index to go from 0 to 4900
)

labelled_dataset

Unnamed: 0,%backup%,.java_wr,.maker_w,.wrapper,.xinitrc,.xsessio,1.1,1.2,1.3,4Dwm,...,xxx,yacc,ypcat,yppasswd,z,zip,zsh,zubs,zz2,user
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [0]:
labels = labelled_dataset['user'] - 1
dataset = labelled_dataset.drop(columns=['user'])

In [0]:
labels =  keras.utils.to_categorical(labels, num_classes=50)

In [0]:
from sklearn.model_selection import train_test_split

training_data, testing_data, training_labels, testing_labels = train_test_split(
    dataset,
    labels, 
    test_size=0.10,
)

# Building the Oracle

In [0]:
oracle = Sequential()

In [0]:
input_layer = Dense(
    units=856,
    activation='relu',
    input_dim=856,
)

In [0]:
hidden_layer = Dense(
    units=30,
    activation='relu',
)

In [0]:
output_layer = Dense(
    units=50,
    activation='softmax',
)

In [0]:
oracle.add(input_layer)
oracle.add(hidden_layer)
oracle.add(output_layer)

Instructions for updating:
Colocations handled automatically by placer.


In [0]:
oracle.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'],
)

# Training Oracle on Dataset

In [0]:
oracle.fit(training_data,  training_labels, epochs=3, batch_size=50)

Instructions for updating:
Use tf.cast instead.
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f202fbeb080>

In [0]:
oracle

<keras.engine.sequential.Sequential at 0x7f202fbe9e10>

# Evaluating the Oracle

In [0]:
loss, accuracy = oracle.evaluate(testing_data, testing_labels)



In [0]:
loss, accuracy

(0.03645177839434664, 0.9854315445608471)

In [0]:
oracle.metrics_names

['loss', 'acc']

# Building a Substitute Model

Attack Model:
  - We only have access to the oracle as a black box, allowing the following interactions:
    - Send input.
    - Receive a prediction.

Need to search the input space to find the decision boundaries, use these inputs to train the substitute model. Note: we can overfit here and it's absolutely fine!

The original "Blackbox... " paper has an accompanying Python library,  [cleverhans](https://github.com/tensorflow/cleverhans/), which we use to implement this attack.

First, mirror the architecture of the oracle:
    

In [0]:
substitute = Sequential()

input_layer = Dense(
    units=856,
    activation='relu',
    input_dim=856,
)
hidden_layer = Dense(
    units=30,
    activation='relu',
)
output_layer = Dense(
    units=50,
    activation='softmax',
)

substitute.add(input_layer)
substitute.add(hidden_layer)
substitute.add(output_layer)

# We need to convert our substitute model into the cleverhans format.
substitute_ch = KerasModelWrapper(substitute)

In [0]:
tensorflow_session = tensorflow.Session()

We start by giving the adversary a small dataset with which to bootstrap it's search. Initially, we give it a random sample of 5% of the original data set. 

We can then steal the rest of the dataset to determine the accuracy of our substitute model. **NOTE** They do this in the tutorial code but, is it legit? Or are we cheating?


In [0]:
adversary_training_set, adversary_test_set = train_test_split(
    labelled_dataset,
    train_size=0.05,
    stratify=labelled_dataset['user'],
)

adversary_training_inputs = adversary_training_set.drop('user', axis='columns')
adversary_training_labels = adversary_training_set['user'] - 1  # keras requires 0 based index

# For some reason cleverhans doesn't detect a GPU when it runs, but our models at the top using
# keras _do_. I think this creates a type mis-match: code running on the GPU uses numpy.float64
# whilst the cleverhans stuff runs on the CPU and extects numpy.float32 (or vica versa).
#   -> This is why this dodgy type conversion exists:
adversary_training_inputs = adversary_training_inputs.values.astype(numpy.float32)
adversary_training_labels = adversary_training_labels.values



Define input placeholders for the tensor flow model (these are then used to generate new points)

In [0]:
number_of_users = 50
number_of_commands = 856

input_placeholder = tensorflow.placeholder(
    tensorflow.float32,
    shape=(None, number_of_commands)
)

output_placeholder = tensorflow.placeholder(
    tensorflow.float32,
    shape=(None, number_of_users)
)

Get the oracles predictions for the "bootstrap" inputs:

In [0]:
bootstrap_oracle_predictions = oracle.predict(adversary_training_inputs)

Train substitute using method from https://arxiv.org/abs/1602.02697

In [0]:
# Define the predictions and loss of the model, symbolically in TensorFlow (i.e. these variables 
# point to the result of calculations that haven't been performed yet)

substitute_predictions = substitute_ch.get_logits(input_placeholder)
substitute_loss = CrossEntropy(substitute_ch, smoothing=0)

Here we define a Jacobian Graph/Model (**TODO** What in the world is this? and why are we using it?).

In [0]:
# Define the Jacobian symbolically using TensorFlow
grads = jacobian_graph(substitute_predictions, input_placeholder, number_of_users)

number_of_dataset_augmentation_batches = 5
dataset_augmentation_batch_size = 512


stepsize = 1  # this is the step-size of the Jacobian augmentation (we are working in ints so use 1).


# Train the substitute and augment dataset alternatively
for batch in range(number_of_dataset_augmentation_batches):
    print("BATCH #" + str(batch))
    
    print("Substitute training epoch:")
    train(
        tensorflow_session, 
        substitute_loss,
        adversary_training_inputs, 
        keras.utils.to_categorical(adversary_training_labels, num_classes=50),
        init_all=False,
        args={
            'nb_epochs': 10,
            'batch_size': 32,
            'learning_rate': 0.001,
        },
        rng=rng,
    )
    

    # If we are not at last substitute training iteration, augment dataset
    in_final_batch = batch == number_of_dataset_augmentation_batches - 1
    if not in_final_batch:
        print("Generating new data points:")
        
        # Use Jacobian augmentation to generate new data points:
        step_coef = 2 * int(int(batch / 3) != 0) - 1 

        augmented_dataset_inputs = jacobian_augmentation(
            tensorflow_session, 
            input_placeholder, 
            adversary_training_inputs, 
            adversary_training_labels,
            grads,
            step_coef * stepsize,
            dataset_augmentation_batch_size,
        )
        new_datapoints = augmented_dataset_inputs[len(adversary_training_inputs):]

        # Send the newly generated data points to the oracle, and use its output as their labels:
        new_labels = oracle.predict(new_datapoints)

        # Note here that we take the argmax because the adversary
        # only has access to the label (not the probabilities) output
        # by the black-box model
        new_labels = numpy.argmax(new_labels, axis=1)

        augmented_dataset_labels = numpy.hstack([adversary_training_labels, new_labels])

        # Replace dataset and labels with augmented dataset and labels
        adversary_training_inputs = augmented_dataset_inputs
        adversary_training_labels = augmented_dataset_labels

BATCH #0
Substitute training epoch:
Instructions for updating:
dim is deprecated, use axis instead
num_devices:  1


[INFO 2019-03-16 21:25:05,646 cleverhans] Epoch 0 took 2.2861549854278564 seconds
[INFO 2019-03-16 21:25:08,173 cleverhans] Epoch 1 took 2.3262522220611572 seconds
[INFO 2019-03-16 21:25:10,750 cleverhans] Epoch 2 took 2.366286039352417 seconds
[INFO 2019-03-16 21:25:13,346 cleverhans] Epoch 3 took 2.3928585052490234 seconds
[INFO 2019-03-16 21:25:15,959 cleverhans] Epoch 4 took 2.409487247467041 seconds
[INFO 2019-03-16 21:25:18,528 cleverhans] Epoch 5 took 2.3602254390716553 seconds
[INFO 2019-03-16 21:25:21,124 cleverhans] Epoch 6 took 2.3867573738098145 seconds
[INFO 2019-03-16 21:25:23,666 cleverhans] Epoch 7 took 2.334226369857788 seconds
[INFO 2019-03-16 21:25:26,289 cleverhans] Epoch 8 took 2.419886827468872 seconds
[INFO 2019-03-16 21:25:28,859 cleverhans] Epoch 9 took 2.362948179244995 seconds


Generating new data points:
BATCH #1
Substitute training epoch:
num_devices:  1


[INFO 2019-03-16 21:26:02,487 cleverhans] Epoch 0 took 4.759352684020996 seconds
[INFO 2019-03-16 21:26:07,546 cleverhans] Epoch 1 took 4.596355676651001 seconds
[INFO 2019-03-16 21:26:12,837 cleverhans] Epoch 2 took 4.844956159591675 seconds
[INFO 2019-03-16 21:26:17,886 cleverhans] Epoch 3 took 4.580094814300537 seconds
[INFO 2019-03-16 21:26:23,081 cleverhans] Epoch 4 took 4.746647119522095 seconds
[INFO 2019-03-16 21:26:28,102 cleverhans] Epoch 5 took 4.555025339126587 seconds
[INFO 2019-03-16 21:26:33,244 cleverhans] Epoch 6 took 4.69248628616333 seconds
[INFO 2019-03-16 21:26:38,246 cleverhans] Epoch 7 took 4.541560173034668 seconds
[INFO 2019-03-16 21:26:43,306 cleverhans] Epoch 8 took 4.613022089004517 seconds
[INFO 2019-03-16 21:26:48,218 cleverhans] Epoch 9 took 4.446341276168823 seconds


Generating new data points:
BATCH #2
Substitute training epoch:
num_devices:  1


[INFO 2019-03-16 21:27:56,884 cleverhans] Epoch 0 took 9.864603042602539 seconds
[INFO 2019-03-16 21:28:07,457 cleverhans] Epoch 1 took 9.241704940795898 seconds
[INFO 2019-03-16 21:28:18,605 cleverhans] Epoch 2 took 9.824736833572388 seconds
[INFO 2019-03-16 21:28:29,089 cleverhans] Epoch 3 took 9.152734994888306 seconds
[INFO 2019-03-16 21:28:39,621 cleverhans] Epoch 4 took 9.204599142074585 seconds
[INFO 2019-03-16 21:28:50,185 cleverhans] Epoch 5 took 9.226829051971436 seconds
[INFO 2019-03-16 21:29:00,676 cleverhans] Epoch 6 took 9.159837245941162 seconds
[INFO 2019-03-16 21:29:11,309 cleverhans] Epoch 7 took 9.299407958984375 seconds
[INFO 2019-03-16 21:29:21,921 cleverhans] Epoch 8 took 9.270481586456299 seconds
[INFO 2019-03-16 21:29:32,437 cleverhans] Epoch 9 took 9.182785987854004 seconds


Generating new data points:
BATCH #3
Substitute training epoch:
num_devices:  1


[INFO 2019-03-16 21:31:56,250 cleverhans] Epoch 0 took 22.55489444732666 seconds
[INFO 2019-03-16 21:32:18,607 cleverhans] Epoch 1 took 18.77799940109253 seconds
[INFO 2019-03-16 21:32:44,715 cleverhans] Epoch 2 took 23.122028827667236 seconds
[INFO 2019-03-16 21:33:06,278 cleverhans] Epoch 3 took 18.69193983078003 seconds
[INFO 2019-03-16 21:33:28,658 cleverhans] Epoch 4 took 19.54316258430481 seconds
[INFO 2019-03-16 21:33:50,160 cleverhans] Epoch 5 took 18.638306140899658 seconds
[INFO 2019-03-16 21:34:11,284 cleverhans] Epoch 6 took 18.297693252563477 seconds
[INFO 2019-03-16 21:34:33,357 cleverhans] Epoch 7 took 19.19094467163086 seconds
[INFO 2019-03-16 21:34:54,969 cleverhans] Epoch 8 took 18.728404998779297 seconds
[INFO 2019-03-16 21:35:16,657 cleverhans] Epoch 9 took 18.81226873397827 seconds


Generating new data points:
BATCH #4
Substitute training epoch:
num_devices:  1


[INFO 2019-03-16 21:40:25,427 cleverhans] Epoch 0 took 45.060179710388184 seconds
[INFO 2019-03-16 21:41:11,936 cleverhans] Epoch 1 took 39.94614768028259 seconds
[INFO 2019-03-16 21:42:02,703 cleverhans] Epoch 2 took 44.491843461990356 seconds
[INFO 2019-03-16 21:42:49,533 cleverhans] Epoch 3 took 40.694437980651855 seconds
[INFO 2019-03-16 21:43:34,505 cleverhans] Epoch 4 took 38.393383741378784 seconds
[INFO 2019-03-16 21:44:19,324 cleverhans] Epoch 5 took 38.0654182434082 seconds
[INFO 2019-03-16 21:45:03,716 cleverhans] Epoch 6 took 38.322187185287476 seconds
[INFO 2019-03-16 21:45:47,405 cleverhans] Epoch 7 took 37.57575440406799 seconds
[INFO 2019-03-16 21:46:31,942 cleverhans] Epoch 8 took 38.598925828933716 seconds
[INFO 2019-03-16 21:47:15,561 cleverhans] Epoch 9 took 37.58806490898132 seconds


In [66]:
#                )\         O_._._._A_._._._O         /(               
#                 \`--.___,'=================`.___,--'/                
#                  \`--._.__                 __._,--'/                 
#                    \  ,. l`~~~~~~~~~~~~~~~'l ,.  /                   
#        __            \||(_)!_!_!_.-._!_!_!(_)||/            __       
#        \\`-.__        ||_|____!!_|;|_!!____|_||        __,-'//       
#         \\    `==---='-----------'='-----------`=---=='    //        
#         | `--.                _   _   _                ,--' |        
#         | `--.               / \ / \ / \               ,--' |        
#         | `--.          ~~~ ( R | A | M ) ~~~          ,--' |        
#         | `--.               \_/ \_/ \_/               ,--' |        
#          \  ,.`~~~~~~~~~~~~~             ~~~~~~~~~~~~~',.  /         
#            \||  ____,-------._,-------._,-------.____  ||/           
#             ||\|___!`======="!`======="!`======="!___|/||            
#             || |---||--------||-| | |-!!--------||---| ||            
#   __O_____O_ll_lO_____O_____O|| |'|'| ||O_____O_____Ol_ll_O_____O__  
#   o H o o H o o H o o H o o |-----------| o o H o o H o o H o o H o  
#  ___H_____H_____H_____H____O =========== O____H_____H_____H_____H___ 
#                           /|=============|\                          
# ()______()______()______() '==== +-+ ====' ()______()______()______()
# ||{_}{_}||{_}{_}||{_}{_}/| ===== |_| ===== |\{_}{_}||{_}{_}||{_}{_}||
# ||      ||      ||     / |==== s(   )s ====| \     ||      ||      ||
# ======================()  =================  ()======================
# ----------------------/| ------------------- |\----------------------
#                      / |---------------------| \                     
# -'--'--'           ()  '---------------------'  ()                   
#                    /| ------------------------- |\    --'--'--'      
#        --'--'     / |---------------------------| \    '--'          
#                 ()  |___________________________|  ()           '--'-
#   --'-          /| _______________________________  |\               
#  --' gpyy      / |__________________________________| \           

# ALL HAIL THE RAM GODS
# We used tons of memory in the above step, so delete everything
# we don't need and manually run the GC.

# TODO

del augmented_dataset_inputs
del augmented_dataset_labels
del unlabelled
del labelled_dataset


import gc
gc.collect()

291

In [0]:
adversary_training_labels

array([22, 11, 13, ..., 28,  8, 48])

In [0]:
adversary_test_inputs = adversary_test_set.drop('user', axis='columns')
adversary_test_labels = adversary_test_set['user'] - 1  # keras requires 0 based index

# For some reason cleverhans doesn't detect a GPU when it runs, but our models at the top using
# keras _do_. I think this creates a type mis-match: code running on the GPU uses numpy.float64
# whilst the cleverhans stuff runs on the CPU and extects numpy.float32 (or vica versa).
#   -> This is why this dodgy type conversion exists:
adversary_test_inputs = adversary_test_inputs.values.astype(numpy.float32)
adversary_test_labels = adversary_test_labels.values

## Evaluating Substitute Model

Here we evaluate the substitute against the 95% of the dataset it hasn't seen. First, check it's accuracy against the true labels:

In [0]:
adversary_test_labels_one_hot = keras.utils.to_categorical(adversary_test_labels, num_classes=50)

# Evaluate the substitute model on clean test examples
acc = model_eval(
    tensorflow_session, 
    input_placeholder,
    output_placeholder,
    substitute_predictions,
    adversary_test_inputs,
    adversary_test_labels_one_hot,
    args={'batch_size': 32}
)
acc

0.9593037740874063

Next, send this test dataset into the oracle to get it's predictions. Then, compare the substitute models  predctions against those of the oracle. This is important, as it allows us to measure: how good of an imitatin of the oracle our substitute is.

In [0]:
oracle_predicted_labels = oracle.predict(adversary_test_inputs)

# Evaluate the substitute model on clean test examples
acc = model_eval(
    tensorflow_session, 
    input_placeholder,
    output_placeholder,
    substitute_predictions,
    adversary_test_inputs,
    oracle_predicted_labels,
    args={'batch_size': 32}
)
acc

0.9617866132870557

TODO: How do these two accuracies compare? What does that mean about our substitute? What does that say about our oracle?)

## Inspecting the Synthetic Dataset

Just inspecting the generated dataset. Notes:
  1. Some of the values are negative!
  2. The real dataset has an input range of 0-100. This search technique has found all of them, plus a few on each side.
  3. The augmented dataset has just less than 200,000 data points. That's almost as many as were used to train the oracle.

In [0]:
numpy.unique(adversary_training_inputs)

array([ -4.,  -3.,  -2.,  -1.,   0.,   1.,   2.,   3.,   4.,   5.,   6.,
         7.,   8.,   9.,  10.,  11.,  12.,  13.,  14.,  15.,  16.,  17.,
        18.,  19.,  20.,  21.,  22.,  23.,  24.,  25.,  26.,  27.,  28.,
        29.,  30.,  31.,  32.,  33.,  34.,  35.,  36.,  37.,  38.,  39.,
        40.,  41.,  42.,  43.,  44.,  45.,  46.,  47.,  48.,  49.,  50.,
        51.,  52.,  53.,  54.,  55.,  56.,  57.,  58.,  59.,  60.,  61.,
        62.,  63.,  64.,  65.,  66.,  67.,  68.,  69.,  70.,  71.,  72.,
        73.,  74.,  75.,  76.,  77.,  78.,  79.,  80.,  81.,  82.,  83.,
        84.,  85.,  86.,  87.,  88.,  89.,  90.,  91.,  92.,  93.,  94.,
        95.,  96.,  97.,  98.,  99., 100., 101.], dtype=float32)

In [0]:
len(adversary_training_inputs)

196032

In [0]:
pandas.DataFrame(adversary_training_inputs[numpy.random.choice(adversary_training_inputs.shape[0], size=20)])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,846,847,848,849,850,851,852,853,854,855
0,-3.0,-1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-3.0,1.0,...,2.0,1.0,3.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,1.0
1,2.0,-2.0,0.0,-2.0,-2.0,2.0,2.0,-2.0,0.0,2.0,...,2.0,0.0,0.0,2.0,-2.0,0.0,2.0,0.0,-2.0,0.0
2,1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0,...,-1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0
3,-1.0,-1.0,3.0,1.0,3.0,1.0,1.0,-1.0,3.0,1.0,...,-1.0,-1.0,-1.0,-1.0,3.0,-3.0,-1.0,-1.0,1.0,-1.0
4,2.0,2.0,0.0,0.0,0.0,2.0,2.0,0.0,2.0,2.0,...,0.0,-2.0,0.0,2.0,-2.0,-2.0,2.0,0.0,0.0,0.0
5,-1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,-1.0,2.0,...,1.0,1.0,1.0,-1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0
6,-3.0,1.0,-1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0,3.0,...,-1.0,1.0,-1.0,1.0,-3.0,-1.0,-3.0,-1.0,3.0,1.0
7,-3.0,3.0,-1.0,1.0,-1.0,3.0,-1.0,1.0,1.0,3.0,...,0.0,-3.0,1.0,1.0,-3.0,1.0,-1.0,-1.0,1.0,-1.0
8,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,...,-1.0,-1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0
9,-1.0,1.0,1.0,3.0,-1.0,3.0,1.0,3.0,1.0,3.0,...,-1.0,1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0


# Crafting Adversarial Examples

First we build an attack using the Fast Gradient Sign method. This attack is then used to generate untargeted adversarial examples for each value in our test set.

In [0]:
# Initialize the Fast Gradient Sign Method (FGSM) attack object.
fgsm_par = {'eps': 1., 'ord': numpy.inf, 'clip_min': 0., 'clip_max': 100.}
fgsm = FastGradientMethod(substitute_ch, sess=tensorflow_session)

In [0]:
# Craft adversarial examples using the substitute
eval_params = {'batch_size': dataset_augmentation_batch_size}
x_adv_sub = fgsm.generate(input_placeholder, **fgsm_par)

Instructions for updating:
Use tf.cast instead.


In [0]:
x_adv_sub

<tf.Tensor 'Identity:0' shape=(?, 856) dtype=float32>

In [0]:
oracle_keras = KerasModelWrapper(oracle)
oracle_fgsm_pred = oracle_keras.get_logits(x_adv_sub)

In [0]:
oracle_fgsm_pred

<tf.Tensor 'model_2/dense_3/BiasAdd:0' shape=(?, 50) dtype=float32>

In [0]:
# Evaluate the accuracy of the "black-box" model on adversarial examples
accuracy = model_eval(
        tensorflow_session,
        input_placeholder,
        output_placeholder,
        oracle_fgsm_pred,
        adversary_test_inputs,
        adversary_test_labels_one_hot,
        args=eval_params
)
print('Test accuracy of oracle on adversarial examples generated '
    'using the substitute: ' + str(accuracy))

Test accuracy of oracle on adversarial examples generated using the substitute: 0.019708073093411455


In the above we have shown that the accuracy of the oracle is reduced to 1.6% when using our minimally changed command vectors! This is an example of an untargeted attack: all we are trying to do is get the oracle to misclassify a datapoint as a user which we believe truly represents a different user.

### Targetting Particular Users

Pick an example command vector which wasn't user 2, then generate a similar example that is classified as user 2. 

We then put that example into the oracle model and check it's classification. If it is classified as user 2, our attempt was successfull.

In [55]:
original_command_vectors = numpy.array([adversary_test_inputs[0]])
original_labels = numpy.array([adversary_test_labels_one_hot[0]])

target_labels = keras.utils.to_categorical(numpy.array([2]), num_classes=50)

fgsm_attack = FastGradientMethod(substitute_ch, sess=tensorflow_session)
fgsm_params = {
    'eps': 1.0,
    'ord': numpy.inf,
    'clip_min': 0.0,
    'clip_max': 100.0,
}
adversarial_examples = fgsm_attack.generate_np(
    original_command_vectors,
    y_target=target_labels,
    **fgsm_params,
)

predicted_labels = oracle.predict(adversarial_examples)

print("original label = {}".format(adversary_test_labels[0]))
print("predicted label = {}".format(numpy.argmax(predicted_labels[0])))
print("prediction certainty = {}".format(numpy.max(predicted_labels[0])))

[INFO 2019-03-16 21:49:34,484 cleverhans] Constructing new graph for attack FastGradientMethod


original label = 43
predicted label = 2
prediction certainty = 0.9980084300041199


Whilst  the above demonstrates the attacks effectiveness with one particular example, targetted against one particular user, we should be sure to evaluate the attacks efffectiveness in general.

In fact, it is most often the case that the fast gradient sign method is able to generate successful adversarial examples based on a particular starting vector, and a particular target user.

We define a function below to take in one particular command vector, and perform a targeted attack against each of the users.

In [0]:
def run_targeted_attack_against_all_users(command_vector, label, attack, attack_params):
    """
    Runs a targeted attack for the given sample against. For each user, we attempt to generate a
    similar command vector to the original, which is classified as that user.
    
    The command prints a summary of the results to stdout, then returns a dataframe containing, 
    for each attack:
      - the original user
      - the targeted user
      - the oracle models prediction against the adversarial example
      - the oracles certainty of that prediction
    """    
    
    # Since we run this once against all users, make 50 replicas of the command vector, and
    # original label:
    original_label_one_hot = keras.utils.to_categorical(label, num_classes=50)
    original_labels = numpy.tile(original_label_one_hot, reps=(50,1))
    
    original_command_vectors = numpy.tile(command_vector, reps=(50, 1))
    
    # Our target labels are the one-hot-encoded values 0, 1, 2, ..., 49:
    target_labels = keras.utils.to_categorical(range(50), num_classes=50)

    attack_params['y_target'] = target_labels
    
    # Apply the attack, generating the adversarial examples:
    adversarial_examples = attack.generate_np(
        original_command_vectors,
        **attack_params,
    )

    # Stick these examples into the oracle, and find out what classification it gives:
    predictions = oracle.predict(adversarial_examples)

    # Format the results into a summary dataframe:
    original_label = pandas.Series(
        numpy.apply_along_axis(numpy.argmax, axis=1, arr=original_labels), # undo one hot encode
        name='Original User',
    )
    target_label = pandas.Series(
        numpy.apply_along_axis(numpy.argmax, axis=1, arr=target_labels), # undo one hot encode
        name='Target User',
    )
    predicted_label = pandas.Series(
        numpy.apply_along_axis(numpy.argmax, axis=1, arr=predictions),  # undo one hot encode
        name='Oracle Prediction',
    )
    prediction_certainty = pandas.Series(
        numpy.apply_along_axis(numpy.max, axis=1, arr=predictions),
        name='Oracle Certainty',
    )

    summary = pandas.concat(
        [
            original_label,
            target_label,
            predicted_label,
            prediction_certainty,
        ],
        axis='columns',
    )
    
    # Count the number of targeted attacks which were succcessful:
    successful_attacks = summary.apply(lambda row: row[1] == row[2], axis='columns').sum()
    
    # Don't count  the original_user -> original_user attack:
    successful_attacks -= 1 
    total_attacks = 49
    
    # Print out a little message to say how we did :)
    print(
        "A targeted attack was successful against {}/{} users (with the given input):"
        .format(successful_attacks, total_attacks)
    )

    return summary

For example, below we take a command vector not yet seen by the substitute model and try to craft adversarial examples targeted at each user. As you can see, the attack is relatively sucessful!

In [88]:
run_targeted_attack_against_all_users(adversary_test_inputs[0], adversary_test_labels[0], fgsm_attack, fgsm_params)

A targeted attack was successful against 6/49 users (with the given input):


Unnamed: 0,Original User,Target User,Oracle Prediction,Oracle Certainty
0,43,0,12,0.856063
1,43,1,26,1.0
2,43,2,2,0.998008
3,43,3,27,1.0
4,43,4,2,0.990114
5,43,5,27,1.0
6,43,6,12,0.999991
7,43,7,27,0.857102
8,43,8,27,1.0
9,43,9,27,1.0


In [0]:
def evaluate_targetted_attack(test_inputs, test_labels, attack):
    # For each test input, duplicate it 50 times, once for each potential target user 0, 1, ... 49
    # Input all of these into the attack and get outputs
    # Count rows for which target_user == oracle_prediction
    # Divide by total rows

### Custom Attack Method

The oracle model uses integers and absolute counts as inputs. In contrast to something like percentage inputs. This is quite succesfull in limiting effectiveness of our attacks, the results of which can  be seen above. 

A common approach to limiting adversarial attacks for image classifiers is to use thresholding. The use of integer inputs serves a simlar purpose in the original model. Simliarly, using absolute values severly limits our search space. When converting a naughty script, we aren't able to remove commands: we need them to perform our evil deeds! If the model took in percentage inputs (as a distribution) we would be able to to lower values by increasing all other values.

This combination makes the original model relatively resistant to standard attack methods. In this section we explore methods of overcoming these restrictions. In particular, we extend an attack originally proposed by Carlini and Wagner. It uses the Adam optimiser and prioritises accuracy over speed. We modify it's optimisation function such that it deprioritise negative perturbations.
  
In contrast to attacks against image classification models, our input does not need to look similar to a human. For this reason, we further modify the optimisation functions not to consider the size of positive perturbations.

Our optimisation functions considers the following most important:
  - no commands should be removed
  - it should classify as strongly as possible to the target class


In [91]:
# Just use CarliniWagnerL2 and see how it does

smm_attack = cleverhans.attacks.MomentumIterativeMethod(substitute_ch, sess=tensorflow_session)
smm_params = {
    'eps': 100.0,
    'eps_iter': 1.0,
    'nb_iter': 100,
    'ord': 2,
    'clip_min': 0.0,
    'clip_max': 100.0,
}

run_targeted_attack_against_all_users(adversary_test_inputs[0], adversary_test_labels[0], smm_attack, smm_params)

[INFO 2019-03-16 22:40:50,201 cleverhans] Constructing new graph for attack MomentumIterativeMethod


A targeted attack was successful against 29/49 users (with the given input):


Unnamed: 0,Original User,Target User,Oracle Prediction,Oracle Certainty
0,43,0,0,0.874502
1,43,1,1,1.0
2,43,2,2,1.0
3,43,3,3,1.0
4,43,4,4,1.0
5,43,5,5,1.0
6,43,6,6,1.0
7,43,7,27,0.857762
8,43,8,8,1.0
9,43,9,9,1.0


In [0]:
# TODO: override CarliniWagnerL2 and stop negative perturbations

class AdditivePerturbationsAttack(CarliniWagnerL2):
    

# An End-to-End Attack

In this section, we use the models and attacks developed above to perform a complete attack on the intrusion detection system.

First we define two functions:
  1. `script_to_command_vector` :: converts a list of bash commands into a command vector (as if it werre generated by `acct`).
  2. `pad_script` :: takes an input script and generats an output script with the same behaviour, but with the command counts specified by command_vector.

In [0]:
def script_to_command_vector(script):
    lines = script.split("\n")  # ['netscape', 'sh ./my-script.sh', ...]
    commands = [
        line.split(" ")[0] for line in lines
    ]  # ['netscape', 'sh', ...]
    
    commands = pandas.Series(commands).astype(command_dtype)
    commands_one_hot = pandas.get_dummies(commands)
    command_counts = commands_one_hot.sum()
    
    return command_counts

In [0]:
# For our proof-of-concept, just append --help to turn our commands into no-ops. This won't
# actually work for all of these commands, but proves the point.
COMMAND_TO_NOOP = {command: command + " --help" for command in commands}

def pad_script(original_script, target_command_counts):
    # First, calculate the command counts of the input script:
    original_command_counts = script_to_command_vector(original_script)
    
    # Find the number of each command we need to pad by:
    additional_command_counts = target_command_counts - original_command_counts
    
    # Loop over additional_command_counts and append no-op commands for each additional
    # command needed:
    
    # TODO: finish this
    padded_script = original_script
    
    return padded_script

In [0]:
def masq(script, target_user):
    command_vector = script_to_command_vector(script)
    original_command_vectors = numpy.array([command_vector])

    target_labels = keras.utils.to_categorical(numpy.array([target_user]), num_classes=50)

    attack = FastGradientMethod(substitute_ch, sess=tensorflow_session)
    adversarial_examples = attack.generate_np(
        original_command_vectors,
        y_target=target_labels,
        eps=1.0,
        ord=numpy.inf,
        clip_min=0.0,
        clip_max=100.0,
    )

    predicted_labels = oracle.predict(adversarial_examples)
    
    adversarial_example = adversarial_examples[0]
    predicted_label = numpy.argmax(predicted_labels[0])
    
    if predicted_label == target_user:
        fool = 'We have fooled the model!'
    else:
        fool = 'We have failed to fool the model... oh dear.'
    
    print('These are the adversarial example that was generated: \n')
    print(str(adversarial_example))
    print('\n and this is the predicted user: \n')
    print(str(predicted_label))
    print('\n '+fool)


masq(
    script="""
        cat
        hostname
        awk
        stty
        tset
        sh
        chmod
        chmod
        chmod
        chmod
        news
        sh ./my-script.sh
        netstat
        netscape
        netscape
        netscape
        netscape
        netscape
        netscape
    """,
    target_user=16,    
    )