In [1]:
import torch
import torch.optim as optim
from random import shuffle

from aalpy.learning_algs import run_Lstar
from aalpy.oracles import RandomWordEqOracle
from aalpy.utils import load_automaton_from_file

from RNN import get_model, Optimization
from util import conformance_test, RNNSul

from extract_dnp3 import *
from automata_data_generation import AutomatonDataset

In [2]:
exp_name = 'dnp3'
device = None

num_training_samples = 400
num_val_samples = 155

automaton_data, input_al, output_al = generate_dnp3_data('captures-sample')

Working on captures-sample/dnp3-2.pcap
Gathering streams.....
Identified 946 TCP streams!
Iterating over s treams.....
Working on captures-sample/dnp3-1.pcap
Gathering streams.....
Identified 1 TCP streams!
Iterating over s treams.....
Working on captures-sample/dnp3.pcap
Gathering streams.....
Identified 8 TCP streams!
Iterating over s treams.....
Returned training data...


In [3]:
input_al

['OPERATE',
 'WARM_RESTART',
 'READ',
 'DISABLE_UNSOLICITED',
 'SELECT',
 'WRITE',
 'COLD_RESTART',
 'ENABLE_UNSOLICITED',
 'STOP_APPL',
 'CONFIRM']

In [4]:
output_al

{'CONFIRM', 'RESPONSE', 'UNSOLICITED_RESPONSE'}

In [5]:
shuffle(automaton_data)

In [6]:
training_data, validation_data = automaton_data[:num_training_samples], automaton_data[num_training_samples:]

In [7]:
batch_size = 64
data_handler = AutomatonDataset(input_al, output_al, batch_size, device=device)

In [8]:
train, val = data_handler.create_dataset(training_data), data_handler.create_dataset(validation_data)

In [9]:
# Setup RNN parameters
model_type = 'gru'
activation_fun = 'relu'  # note that activation_fun value is irrelevant for GRU and LSTM
input_dim = len(input_al)
output_dim = len(output_al)
hidden_dim = 20
layer_dim = 2
dropout = 0  # 0.1 if layer_dim > 1 else 0
n_epochs = 100
optimizer = optim.Adam
learning_rate = 0.0005
weight_decay = 1e-6
early_stop = True  # Stop training if loss is smaller than small threshold for few epochs

model_params = {'input_dim': input_dim,
                'hidden_dim': hidden_dim,
                'layer_dim': layer_dim,
                'output_dim': output_dim,
                'nonlinearity': activation_fun,
                'dropout_prob': dropout,
                'data_handler': data_handler,
                'device': None}

model = get_model(model_type, model_params)

optimizer = optimizer(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

opt = Optimization(model=model, optimizer=optimizer, device=None)

process_hs_fun = 'flatten_lstm' if model_type == 'lstm' else 'flatten'

# This will train the RNN
# If trained model with same parametrization exists, it will be loaded unless load flag is set to False
opt.train(train, val, n_epochs=n_epochs, exp_name=exp_name, early_stop=early_stop, save=True, load=True)

[1/100] Training loss: 1.1257	 Validation loss: 1.0454	 Accuracy: 76.66%
[2/100] Training loss: 0.9917	 Validation loss: 0.9158	 Accuracy: 96.35%
[3/100] Training loss: 0.8606	 Validation loss: 0.7805	 Accuracy: 94.58%
[4/100] Training loss: 0.7198	 Validation loss: 0.6317	 Accuracy: 94.58%
[5/100] Training loss: 0.5653	 Validation loss: 0.4729	 Accuracy: 94.58%
[6/100] Training loss: 0.4091	 Validation loss: 0.3304	 Accuracy: 94.58%
[7/100] Training loss: 0.2837	 Validation loss: 0.2356	 Accuracy: 94.58%
[8/100] Training loss: 0.2081	 Validation loss: 0.1860	 Accuracy: 94.58%
[9/100] Training loss: 0.1694	 Validation loss: 0.1615	 Accuracy: 94.58%
[10/100] Training loss: 0.1494	 Validation loss: 0.1483	 Accuracy: 94.58%
[15/100] Training loss: 0.1169	 Validation loss: 0.1256	 Accuracy: 94.58%
[20/100] Training loss: 0.1054	 Validation loss: 0.1177	 Accuracy: 94.58%
[25/100] Training loss: 0.0978	 Validation loss: 0.1127	 Accuracy: 94.58%
[30/100] Training loss: 0.0917	 Validation loss

In [10]:
# disable all gradient computations to speed up execution
torch.no_grad()

# check the RNN for accuracy on randomly generated data
# conformance_test(model, automaton, n_tests=1000, max_test_len=30)

# wrap RNN in AALpy's SUL interface
sul = RNNSul(model)
# this is a weak eq. oracle with weak configuration
eq_oracle = RandomWordEqOracle(input_al, sul, num_walks=1000, max_walk_len=10)

learned_model = run_Lstar(input_al, sul, eq_oracle, 'mealy')
learned_model.visualize()

Hypothesis 1: 6 states.
-----------------------------------
Learning Finished.
Learning Rounds:  1
Number of states: 6
Time (in seconds)
  Total                : 8.49
  Learning algorithm   : 1.35
  Conformance checking : 7.14
Learning Algorithm
 # Membership Queries  : 600
 # MQ Saved by Caching : 10
 # Steps               : 1800
Equivalence Query
 # Membership Queries  : 1000
 # Steps               : 10000
-----------------------------------
Visualization started in the background thread.
Model saved to LearnedModel.pdf.


Start : This command cannot be run due to the error: The system cannot find the file specified.
At line:1 char:1
+ Start "file:///home/lone/UTAStuff/adv-spc-infosec/project/rnn/train-r ...
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    + CategoryInfo          : InvalidOperation: (:) [Start-Process], InvalidOperationException
    + FullyQualifiedErrorId : InvalidOperationException,Microsoft.PowerShell.Commands.StartProcessCommand
 
