In [4]:
%load_ext autoreload
%autoreload 2

import os
from datetime import datetime
import pprint
import glob
import logging
import pickle
import sys
from pprint import pformat

from sacred import Experiment
from sacred.observers import MongoObserver

import pathpy
import numpy as np
import pandas as pd

from scipy.stats import chi2
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker

wupsi


In [5]:
from matplotlib.pyplot import figure

In [6]:
root_dir = os.curdir
max_nest = 10  # arbitrary, 3 would probably suffice
nest = 0
while "src" not in os.listdir(root_dir) and nest < max_nest:
    # Look up the directory structure for a src directory
    root_dir = os.path.join(os.pardir, root_dir)
    nest += 1

# If you don't find the src directory, the root directory is this directory
root_dir = os.path.abspath(root_dir) if nest < max_nest else os.path.abspath(
    os.curdir)

# Get the source directory and append path to access
# python packages/scripts within directory
if "src" in os.listdir(root_dir):
    src_dir = os.path.join(root_dir)
    sys.path.append(src_dir)

In [7]:
from src.data_processing import generate_temporal_network, get_runs

In [8]:
style = {    
  'ts_per_frame': 50000, 
  'ms_per_frame': 8,
  'look_ahead': 200000, 
  'look_behind': 200000, 
  'node_size': 15, 
  'inactive_edge_width': 2,
  'active_edge_width': 4, 
  'label_color' : '#00000',
  'label_size' : '24px',
  'label_offset': [0,10],
  'width': 1000,
  'height': 800,
}

In [9]:
datasets = {
    "CVE-2014-0160": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings-01/CVE-2014-0160.tar.gz",
    "PHP_CWE-434": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings-01/PHP_CWE-434.tar.gz",
    "Bruteforce_CWE-307": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings-01/Bruteforce_CWE-307.tar.gz",
    "SQL_Injection_CWE-89": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings-01/SQL_Injection_CWE-89.tar.gz",
    "ZipSlip": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings/ZipSlip.tar.gz",
    "CVE-2012-2122": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings-01/CVE-2012-2122.tar.gz",
    "CVE-2017-7529": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings-01/CVE-2017-7529.tar.gz",
    "CVE-2018-3760": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings-01/CVE-2018-3760.tar.gz",
    "CVE-2019-5418": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings-01/CVE-2019-5418.tar.gz",
    "EPS_CWE-434": "https://www.exploids.de/lid-ds-downloads/LID-DS-Recordings-01/EPS_CWE-434.tar.gz",
}

In [10]:
scenarios = []
for dataset in datasets.keys():
    scenario =  pd.read_csv(f"../data/raw/{dataset}/runs.csv", skipinitialspace=True)
    scenario = scenario.groupby("is_executing_exploit").sample(n=1, random_state=1)
    display(scenario)
    scenario["dataset"] = dataset
    scenarios.append(scenario)

Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
547,victim_heartbleed:latest,itchy_davinci_4564,False,10,60,-1
639,victim_heartbleed:latest,melted_carson_1329,True,10,35,13


Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
1024,fu_victim:latest,ripe_hugle_6081,False,10,60,-1
1097,fu_victim:latest,grumpy_carson_5824,True,10,35,22


Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
745,victim_bruteforce:latest,weak_heisenberg_2728,False,10,45,-1
997,victim_bruteforce:latest,nice_almeida_9203,True,10,55,36


Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
698,sql_victim:latest,full_payne_5501,False,10,35,-1
1065,sql_victim:latest,polite_wescoff_8069,True,10,50,19


Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
241,zipslip_victim,shy_rhodes_1001,False,10,45,-1
1013,zipslip_victim,dirty_blackwell_7369,True,10,40,32


Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
119,vulhub/mysql:5.5.23,brief_khayyam_4347,False,10,45,-1
1382,vulhub/mysql:5.5.23,raspy_moore_2582,True,10,30,9


Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
864,lid-ds/cve-2017-7529:latest,deep_thompson_5079,False,10,60,-1
717,lid-ds/cve-2017-7529:latest,scruffy_kare_6242,True,10,40,16


Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
1009,lid-ds/cve-2018-3760:latest,enough_pike_8024,False,10,40,-1
551,lid-ds/cve-2018-3760:latest,inexpensive_noyce_1736,True,10,55,23


Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
473,lid-ds/cve-2019-5418:latest,fat_thompson_8421,False,10,45,-1
1002,lid-ds/cve-2019-5418:latest,vast_maxwell_3286,True,10,40,18


Unnamed: 0,image_name,scenario_name,is_executing_exploit,warmup_time,recording_time,exploit_start_time
517,eps_victim,brave_edison_9580,False,10,55,-1
1024,eps_victim,fluffy_northcutt_5295,True,10,55,28


In [11]:
filename_base = "../reports/figures/"
for scenario in scenarios:
    for key, run in scenario.iterrows():
        run_path = f'../data/raw/{run["dataset"]}/{run["scenario_name"]}.txt'
        print(run_path)
        net = generate_temporal_network(run_path)
        save_path = f'../reports/figures/{run["dataset"]}_{run["scenario_name"]}_{run["is_executing_exploit"]}.html'
        print(save_path)
        pathpy.visualisation.export_html(net, save_path, **style)
        print(net)

../data/raw/CVE-2014-0160/itchy_davinci_4564.txt
2020-10-09 10:51:59 [Severity.INFO]	Building index data structures ...
2020-10-09 10:51:59 [Severity.INFO]	Sorting time stamps ...
2020-10-09 10:51:59 [Severity.INFO]	finished.
../reports/figures/CVE-2014-0160_itchy_davinci_4564_False.html
Nodes:			41
Time-stamped links:	3814
Links/Nodes:		93.02439024390245
Observation period:	[0, 59407544]
Observation length:	 59407544 
Time stamps:		 3787 
Avg. inter-event dt:	 15691.374537770735
Min/Max inter-event dt:	 1/1001092
../data/raw/CVE-2014-0160/melted_carson_1329.txt
2020-10-09 10:51:59 [Severity.INFO]	Building index data structures ...
2020-10-09 10:51:59 [Severity.INFO]	Sorting time stamps ...
2020-10-09 10:51:59 [Severity.INFO]	finished.
../reports/figures/CVE-2014-0160_melted_carson_1329_True.html
Nodes:			32
Time-stamped links:	3164
Links/Nodes:		98.875
Observation period:	[0, 34086031]
Observation length:	 34086031 
Time stamps:		 3149 
Avg. inter-event dt:	 10827.83703939009
Min/Max 

In [None]:
paths = pickle.load(open("../data/interim/CVE-2017-7529/temp_paths_10.p", "rb"))

In [None]:
print(paths)

print(paths.paths[4])

In [None]:
hon_1 = pathpy.HigherOrderNetwork(paths, k=1)
#print(hon_1.transition_matrix())

In [None]:
mog = pathpy.MultiOrderModel(paths, max_order=3)
print(mog.paths.paths.keys())
#order = mog.estimate_order()
print(order)

In [None]:
mypaths = pathpy.Paths()
mypaths.add_path(["open","stat"])
mypaths.add_path(['epoll_wait', 'epoll_wait', 'recvmsg', 'close', 'recvmsg'])
mypaths.add_path(["stat","open"])
mypaths.add_path(["stat","open"])
mypaths.add_path(["stat","open"])
mypaths.add_path(["stat","open"])
print(mypaths)
#mypaths.add_path(["futex","epoll_wait"])
print(hon_1.likelihood(mypaths, log=True))
print(mog.likelihood(mypaths, log=True))
hon_1.model_size()

In [None]:
print(mypaths)
print(mog.likelihood(mypaths, log=False))
mog.model_size(0)
mog.transition_matrices[0]

In [None]:
style = { 
    'label_offset': [0,-1], 
    'label_color' : 'black', 
    'width': 800, 
    'height': 800 
}
pathpy.visualisation.plot(hon_1, **style)


for e in hon_1.edges:
    print(e, hon_1.edges[e]['weight'])
    

In [None]:
hon_2 = pathpy.HigherOrderNetwork(paths, k=2, null_model=False)
pathpy.visualisation.plot(hon_2, **style)

for e in hon_2.edges:
    print(e, hon_2.edges[e]['weight'])

test_paths = pathpy.HigherOrderNetwork.generate_possible_paths(hon_2, 2)
print(len(test_paths))

In [25]:
style = {    
  'ts_per_frame': 100000, 
  'ms_per_frame': 10,
  'look_ahead': 100, 
  'look_behind': 100, 
  'node_size': 15, 
  'inactive_edge_width': 2,
  'active_edge_width': 4, 
  'label_color' : '#00000',
  'label_size' : '24px',
  'label_offset': [0,10],
  'width': 800,
  'height': 800,
  }
#pathpy.visualisation.plot(temp_net_true, **style)