In [1]:
from scapy.all import *
import numpy as np

filter by:
- tcp packets
- egressing packets
- source ip

In [4]:
pcapfile = 'dataset/raw/attack_capture.pcap'

In [5]:
# store packets by src ip
packets = dict()

In [6]:
def store_pkt(pkt):
    global packets
    ip = pkt[IP].src
    if ip not in packets.keys():
        packets[ip] = []
    packets[ip].append(pkt)

In [7]:
# filter packets egressing from network
network = '10.0.0.64/26'

In [9]:
# filter syntax = https://biot.com/capstats/bpf.html
sniff(
    offline=pcapfile,
    store=False,
    count=1000,
    filter="tcp and src net {net}".format(net=network),
    prn=store_pkt
)

<Sniffed: TCP:0 UDP:0 ICMP:0 Other:0>

In [10]:
all_hosts = packets.keys()
all_hosts

dict_keys(['10.0.0.67'])

In [11]:
pkt = packets['10.0.0.67'][0]

In [12]:
pkt.show()

###[ Ethernet ]### 
  dst       = c2:02:64:e8:00:00
  src       = fe:f5:26:b1:c2:41
  type      = IPv4
###[ IP ]### 
     version   = 4
     ihl       = 5
     tos       = 0x0
     len       = 60
     id        = 61713
     flags     = DF
     frag      = 0
     ttl       = 64
     proto     = tcp
     chksum    = 0x3361
     src       = 10.0.0.67
     dst       = 10.2.2.5
     \options   \
###[ TCP ]### 
        sport     = 52806
        dport     = http
        seq       = 2010281419
        ack       = 0
        dataofs   = 10
        reserved  = 0
        flags     = S
        window    = 64240
        chksum    = 0x2d6c
        urgptr    = 0
        options   = [('MSS', 1460), ('SAckOK', b''), ('Timestamp', (3820972389, 0)), ('NOP', None), ('WScale', 7)]



In [13]:
float(pkt.time)

1624315639.847212

sort packets by timestamp

In [14]:
for host in packets.keys():
    packets[host] = sorted(packets[host], key=lambda pkt: pkt.time)

extract data from packets

for each source ip (host):
- get observation windows (sequential or sliding?)

In [15]:
T = 15 # observation window in seconds (10-30)

In [16]:
print(packets['10.0.0.67'][0].time)
start_time = np.floor(packets['10.0.0.67'][0].time)
start_time

1624315639.847212


1624315639

In [17]:
print(packets['10.0.0.67'][-1].time)
end_time = np.ceil(packets['10.0.0.67'][-1].time)
end_time

1624315690.763148


1624315691

In [18]:
np.arange(start_time, end_time, 1)

array([1624315639, 1624315640, 1624315641, 1624315642, 1624315643,
       1624315644, 1624315645, 1624315646, 1624315647, 1624315648,
       1624315649, 1624315650, 1624315651, 1624315652, 1624315653,
       1624315654, 1624315655, 1624315656, 1624315657, 1624315658,
       1624315659, 1624315660, 1624315661, 1624315662, 1624315663,
       1624315664, 1624315665, 1624315666, 1624315667, 1624315668,
       1624315669, 1624315670, 1624315671, 1624315672, 1624315673,
       1624315674, 1624315675, 1624315676, 1624315677, 1624315678,
       1624315679, 1624315680, 1624315681, 1624315682, 1624315683,
       1624315684, 1624315685, 1624315686, 1624315687, 1624315688,
       1624315689, 1624315690])

In [19]:
# arrange packets in 1 seconds intervals
def samples(packets, sample_interval=1):
    start_time = np.floor(packets[0].time)
    end_time = np.ceil(packets[-1].time)
    
    packet_samples = []
    
    num_packets = len(packets)
    packet_idx = 0
    for interval_start in np.arange(start_time, end_time, sample_interval):
        sample = []
        while packet_idx < num_packets and packets[packet_idx].time < int(interval_start + sample_interval):
            sample.append(packets[packet_idx])
            packet_idx +=1
        packet_samples.append(sample)
            
    return packet_samples

In [20]:
tmp = samples(packets['10.0.0.67'], sample_interval=1)

In [21]:
for k in tmp:
    print(len(k))

14
76
14
0
0
0
0
64
57
39
40
38
40
39
39
40
33
0
0
0
0
13
13
0
0
0
0
2
20
19
20
203
32
29
59
54
37
6
0
0
0
0
0
0
0
0
0
0
0
0
31
29


In [None]:
np.arange(0, 9, 4)

In [None]:
len(tmp[0:0+4])

In [22]:
def seqObsWindow(data, lengthObsWindow):
    nSamples = len(data)
    obsWindows = []
    for s in np.arange(0,nSamples,lengthObsWindow):
        subdata=data[s:s+lengthObsWindow]
        if len(subdata) == lengthObsWindow:
            obsWindows.append(subdata)
    return obsWindows

In [None]:
j = seqObsWindow(tmp, 4)
print(len(tmp))
print(len(j[0]))
print(len(j))

In [None]:
print(len(j[1][0]))

In [23]:
def slidingObsWindow(data, lengthObsWindow, slidingValue):
    nSamples = len(data)
    obsWindows = []
    for s in np.arange(0,nSamples,slidingValue):
        subdata=data[s:s+lengthObsWindow]
        if len(subdata) == lengthObsWindow:
            obsWindows.append(subdata)
    return obsWindows

In [None]:
k = slidingObsWindow(tmp, 3, 2)
print(len(tmp))
print(len(k[2]))
print(len(k))

In [None]:
print(len(k[3][2]))

In [25]:
res = slidingObsWindow(tmp, T, 5)
len(res)

8

for each observation window:
- Number of TCP packets
- Number of TCP packet *segments*
- Mean TCP packet length
- Variance TCP packet length
- shannon entropy of TCP destination IP
- shannon entropy of TCP destination port
- total silence time
- average silence time sequence
- variance silence time sequence

In [27]:
for sample in res[0]:
    print(len(sample))

14
76
14
0
0
0
0
64
57
39
40
38
40
39
39


Normalize features