# Load data

Here we load and preprocess data from CSV file

In [1]:
import modules.patterns as patterns
import tabulate

# bigger number is lower precission
# highest precission is 1
p = patterns.Patterns(precission=100, window_size=100)

folder_loc = "../data/2015/Network"
result_loc = "../output.html"

stats = p.bootstrap(folder_loc)
table = tabulate.tabulate(stats, tablefmt="html")
table

with open(result_loc, "w") as fd:
    fd.write(table.str)


## Detect anomalies from input data

We can get the information about attacks from SWaT dataset `data/2015/List_of_attacks_Final.csv. They represent are divided into groups:

- Single stage single point (SSSP)
- Single stage multi point (SSMP)
- Multi stage single point (MSSP)
- Multi stage multi point attacks (MSMP)

In [2]:
import modules.load as load

attacks_file_loc = "../data/2015/List_of_attacks_Final_fixed.xlsx"
stages, anomalies = load.anomalies(attacks_file_loc)

print("Loaded %d anomalies\n" %(len(anomalies)))

First anomaly detected 2015-12-28T10:29:14
Loaded 35 anomalies



# Process data

Here we process every single file for finding possible patterns

In [3]:
import time
from IPython import display

# edge case, testing one
ts_start = time.time()
stats = p.process_all(anomalies, skip_first=414, max_process=10)

# stats = p.process_all(anomalies, skip_first=414, max_process=-1)

# end of tests
# stats = p.process_all(anomalies, skip_first=701, max_process=5)

for stat in stats:
    table = tabulate.tabulate(stat, tablefmt="html")
    display.display_html(table, raw=True)

    with open(result_loc, "a") as fd:
        fd.write(table.str)

diff = time.time() - ts_start
print("Runtime %d sec." %(diff))


[ 1 / 10 | 414 / 784 ] processing file ../data/2015/Network/2015-12-28_113021_98.log.part07_sorted.csv
[ 2 / 10 | 415 / 784 ] processing file ../data/2015/Network/2015-12-28_113021_98.log.part08_sorted.csv
[ 3 / 10 | 416 / 784 ] processing file ../data/2015/Network/2015-12-28_113021_98.log.part09_sorted.csv
[ 4 / 10 | 417 / 784 ] processing file ../data/2015/Network/2015-12-28_113021_98.log.part10_sorted.csv
[ 5 / 10 | 418 / 784 ] processing file ../data/2015/Network/2015-12-28_113021_98.log.part11_sorted.csv
[ 6 / 10 | 419 / 784 ] processing file ../data/2015/Network/2015-12-28_113021_98.log.part12_sorted.csv
[ 7 / 10 | 420 / 784 ] processing file ../data/2015/Network/2015-12-28_113021_98.log.part13_sorted.csv
[ 8 / 10 | 421 / 784 ] processing file ../data/2015/Network/2015-12-28_113021_98.log.part14_sorted.csv
[ 9 / 10 | 422 / 784 ] processing file ../data/2015/Network/2015-12-28_164554_99.log.part01_sorted.csv
[ 10 / 10 | 423 / 784 ] processing file ../data/2015/Network/2015-12-28_1

0,1,2,3,4
# rows,# Attack points,Attack points,# Attacked stages,Attacked stages
247775,5,"MV101, LIT101, P102, AIT202, LIT301",3,"P1, P2, P3"


0,1,2,3,4,5
Attack #,Detected,Missed,Detected network requests,Missed network requests,False positive network requests
*,False,False,0,0,47093
1,False,False,0,0,0
2,True,False,28,1495,0
5,True,False,25,575,0


0,1,2,3,4,5,6
Total,Detected,Detection %,Missed,Miss %,False Positive,False Positive %
49216,53,0.11,2070,4.21,47093,95.69


0,1,2,3,4
Total,Detected,Detection %,Missed,Miss %
3,2,66.67,1,33.33


Runtime 96 sec.
