In [10]:
import glob
import tensorflow as tf 
import numpy as np
import pandas as pd
from scapy.all import *
import seaborn as sns
sns.set(color_codes=True)
%matplotlib inline

In [6]:
pcap = rdpcap("/Users/kyletopasna/Documents/hunter/pcap/1548216696.814641.pcap")

In [7]:
pcap

<1548216696.814641.pcap: TCP:119 UDP:38 ICMP:0 Other:7>

In [8]:
# Retrieving a single item from packet list
ethernet_frame = pcap[0]
ip_packet = ethernet_frame.payload
segment = ip_packet.payload
data = segment.payload # Retrieve payload that comes after layer 4

# Observe that we just popped off previous layer header
print(ethernet_frame.summary())
print(ip_packet.summary())
print(segment.summary())
print(data.summary()) # If blank, empty object

# Complete depiction of paket
## Achieving understanding that these are the fields will enable the ability 
## to ask the data more meaningful questions ie) type of layer 4 segment is defined in layer 3 packet
ethernet_frame.show()

802.3 38:8b:59:c5:da:3f > 01:80:c2:00:00:00 / LLC / STP
LLC / STP
STP

###[ 802.3 ]### 
  dst       = 01:80:c2:00:00:00
  src       = 38:8b:59:c5:da:3f
  len       = 38
###[ LLC ]### 
     dsap      = 0x42
     ssap      = 0x42
     ctrl      = 3
###[ Spanning Tree Protocol ]### 
        proto     = 0
        version   = 0
        bpdutype  = 0
        bpduflags = 0
        rootid    = 31488
        rootmac   = 38:8b:59:c5:da:3d
        pathcost  = 0
        bridgeid  = 31488
        bridgemac = 38:8b:59:c5:da:3d
        portid    = 32772
        age       = 0.0
        maxage    = 6.0
        hellotime = 2.0
        fwddelay  = 3.0



Test code for organizing numerical information from packets in a pandas dataframe.

Ref : https://secdevops.ai/learning-packet-analysis-with-data-science-5356a3340d4e

In [11]:
# Collect field names from IP/TCP/UDP (These will be columns in DF)
ip_fields = [field.name for field in IP().fields_desc]
tcp_fields = [field.name for field in TCP().fields_desc]
udp_fields = [field.name for field in UDP().fields_desc]

dataframe_fields = ip_fields + ['time'] + tcp_fields + ['payload','payload_raw','payload_hex']

# Create blank DataFrame
df = pd.DataFrame(columns=dataframe_fields)
for packet in pcap[IP]:
    # Field array for each row of DataFrame
    field_values = []
    # Add all IP fields to dataframe
    for field in ip_fields:
        if field == 'options':
            # Retrieving number of options defined in IP Header
            field_values.append(len(packet[IP].fields[field]))
        else:
            field_values.append(packet[IP].fields[field])
    
    field_values.append(packet.time)
    
    layer_type = type(packet[IP].payload)
    for field in tcp_fields:
        try:
            if field == 'options':
                field_values.append(len(packet[layer_type].fields[field]))
            else:
                field_values.append(packet[layer_type].fields[field])
        except:
            field_values.append(None)
    
    # Append payload
    field_values.append(len(packet[layer_type].payload))
    field_values.append(packet[layer_type].payload.original)
    field_values.append(binascii.hexlify(packet[layer_type].payload.original))
    # Add row to DF
    df_append = pd.DataFrame([field_values], columns=dataframe_fields)
    df = pd.concat([df, df_append], axis=0)

# Reset Index
df = df.reset_index()
# Drop old index column
df = df.drop(columns="index")

In [12]:
df.head()

Unnamed: 0,version,ihl,tos,len,id,flags,frag,ttl,proto,chksum,...,dataofs,reserved,flags.1,window,chksum.1,urgptr,options,payload,payload_raw,payload_hex
0,4,5,0,91,61340,,0,255,17,54331,...,,,,,762,,,63,b'\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00...,b'00000000000100000000000012416c656b7a616e6472...
1,4,5,0,169,0,DF,0,64,6,3283,...,8.0,0.0,PA,2048.0,1535,0.0,3.0,117,b'\x17\x03\x03\x00p\x00\x00\x00\x00\x00\x00\x0...,b'17030300700000000000000590887b2f38441f44547d...
2,4,5,0,169,57133,DF,0,64,6,11685,...,8.0,0.0,PA,277.0,46358,0.0,3.0,117,b'\x17\x03\x03\x00p\x00\x00\x00\x00\x00\x00\x0...,b'1703030070000000000000058ecb8c1f43bfac224984...
3,4,5,0,52,0,DF,0,64,6,3400,...,8.0,0.0,A,2046.0,41450,0.0,3.0,0,b'',b''
4,4,5,0,115,0,DF,0,64,6,10725,...,8.0,0.0,PA,2048.0,52217,0.0,3.0,63,b'\x17\x03\x03\x00:\x00\x00\x00\x00\x00\x00\x0...,b'170303003a00000000000002cffad9981bef8e74efb8...


In [13]:
df.columns

Index(['version', 'ihl', 'tos', 'len', 'id', 'flags', 'frag', 'ttl', 'proto',
       'chksum', 'src', 'dst', 'options', 'time', 'sport', 'dport', 'seq',
       'ack', 'dataofs', 'reserved', 'flags', 'window', 'chksum', 'urgptr',
       'options', 'payload', 'payload_raw', 'payload_hex'],
      dtype='object')

In [18]:
df[["src","sport","dst","dport"]]

Unnamed: 0,src,sport,dst,dport
0,192.168.86.21,5353,224.0.0.251,5353
1,192.168.86.21,60604,192.168.86.22,8009
2,192.168.86.22,8009,192.168.86.21,60604
3,192.168.86.21,60604,192.168.86.22,8009
4,192.168.86.21,60832,34.212.215.14,443
5,192.168.86.21,12480,192.168.86.1,53
6,34.212.215.14,443,192.168.86.21,60832
7,192.168.86.21,60832,34.212.215.14,443
8,192.168.86.21,49170,192.168.86.1,53
9,192.168.86.26,49154,255.255.255.255,6666


We just saw a bunch of packets flowing back and forth, but it's hard to pick out individual streams. Here we present a function that builds bidirectional flows.

ref: https://pen-testing.sans.org/blog/2017/10/13/scapy-full-duplex-stream-reassembly

In [21]:
def get_sessions(packet):
    sess = "Other"
    if "Ether" in packet:
        if "IP" in packet:
            if "TCP" in packet:
                sess = str(sorted(["TCP", packet["IP"].src, packet["TCP"].sport,
                                  packet["IP"].dst, packet["TCP"].dport], key=str))
            elif "UDP" in packet:
                sess = str(sorted(["UDP", packet["IP"].src, packet["UDP"].sport,
                                  packet["IP"].dst, packet["UDP"].dport], key=str))
            elif "ICMP" in packet:
                sess = str(sorted(["ICMP", packet["IP"].src, packet["IP"].dst,
                                  packet["ICMP"].code, packet["ICMP"].type, packet["ICMP"].id], key=str))
            else:
                sess = str(sorted(["IP", packet["IP"].src, packet["IP"].dst,
                                  packet["IP"].proto], key=str))
        elif "ARP" in packet:
            sess = str(sorted(["ARP", packet["ARP"].psrc, packet["ARP"].pdst], key=str))
        else:
            sess = packet.sprintf("Ethernet type = %04xr,Ether.type%")
    return sess

In [28]:
bi_directional_sessions = pcap.sessions(get_sessions)
len(bi_directional_sessions)

23

In [29]:
for flow in bi_directional_sessions:
    print(flow)

Other
['192.168.86.21', '224.0.0.251', 5353, 5353, 'UDP']
Ethernet type = 86dd
['192.168.86.21', '192.168.86.22', 60604, 8009, 'TCP']
['192.168.86.21', '34.212.215.14', 443, 60832, 'TCP']
[12480, '192.168.86.1', '192.168.86.21', 53, 'UDP']
['192.168.86.1', '192.168.86.21', 49170, 53, 'UDP']
['192.168.86.26', '255.255.255.255', 49154, 6666, 'UDP']
['192.168.86.21', 443, '52.84.19.49', 63068, 'TCP']
['172.217.14.206', '192.168.86.21', 443, 63069, 'TCP']
['104.115.156.95', '192.168.86.21', 443, 61432, 'TCP']
['192.168.86.21', 5228, 60605, '74.125.20.188', 'TCP']
['192.168.86.1', '192.168.86.21', 'ARP']
[1900, '192.168.86.249', '239.255.255.250', 44673, 'UDP']
['162.254.195.45', '192.168.86.21', 27018, 52731, 'UDP']
['192.168.86.21', 443, 63066, '64.71.168.222', 'TCP']
[1900, '192.168.86.1', '239.255.255.250', 50979, 'UDP']
['192.168.86.21', '34.232.24.250', 443, 63058, 'TCP']
['162.125.34.129', '192.168.86.21', 443, 62944, 'TCP']
['192.168.86.21', 443, 63070, '64.4.54.254', 'TCP']
[17500,

In [36]:
bi_directional_sessions

{'Ethernet type = 86dd': <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>,
 'Other': <PacketList: TCP:0 UDP:0 ICMP:0 Other:5>,
 "['100.26.125.169', '192.168.86.21', 443, 60700, 'TCP']": <PacketList: TCP:4 UDP:0 ICMP:0 Other:0>,
 "['104.115.156.95', '192.168.86.21', 443, 61432, 'TCP']": <PacketList: TCP:4 UDP:0 ICMP:0 Other:0>,
 "['162.125.34.129', '192.168.86.21', 443, 62944, 'TCP']": <PacketList: TCP:6 UDP:0 ICMP:0 Other:0>,
 "['162.254.195.45', '192.168.86.21', 27018, 52731, 'UDP']": <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>,
 "['172.217.14.206', '192.168.86.21', 443, 63069, 'TCP']": <PacketList: TCP:26 UDP:0 ICMP:0 Other:0>,
 "['192.168.86.1', '192.168.86.21', 'ARP']": <PacketList: TCP:0 UDP:0 ICMP:0 Other:2>,
 "['192.168.86.1', '192.168.86.21', 49170, 53, 'UDP']": <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>,
 "['192.168.86.21', '192.168.86.22', 60604, 8009, 'TCP']": <PacketList: TCP:6 UDP:0 ICMP:0 Other:0>,
 "['192.168.86.21', '224.0.0.251', 5353, 5353, 'UDP']": <PacketList: TCP:0 UDP:2 ICMP:0

In [37]:
bi_directional_sessions[ "['100.26.125.169', '192.168.86.21', 443, 60700, 'TCP']"]

<PacketList: TCP:4 UDP:0 ICMP:0 Other:0>

In [54]:
# Collect field names from IP/TCP/UDP (These will be columns in DF)

def build_dataframe(packet_list):

    ip_fields = [field.name for field in IP().fields_desc]
    tcp_fields = [field.name for field in TCP().fields_desc]
    udp_fields = [field.name for field in UDP().fields_desc]

    dataframe_fields = ip_fields + ['time'] + tcp_fields + ["size", 'payload','payload_raw','payload_hex']

    # Create blank DataFrame
    df = pd.DataFrame(columns=dataframe_fields)
    for packet in pcap[IP]:
        # Field array for each row of DataFrame
        field_values = []
        # Add all IP fields to dataframe
        for field in ip_fields:
            if field == 'options':
                # Retrieving number of options defined in IP Header
                field_values.append(len(packet[IP].fields[field]))
            else:
                field_values.append(packet[IP].fields[field])

        field_values.append(packet.time)

        layer_type = type(packet[IP].payload)
        for field in tcp_fields:
            try:
                if field == 'options':
                    field_values.append(len(packet[layer_type].fields[field]))
                else:
                    field_values.append(packet[layer_type].fields[field])
            except:
                field_values.append(None)

        # Append payload
        field_values.append(len(packet[layer_type].payload))
        field_values.append(packet[layer_type].payload.original)
        field_values.append(binascii.hexlify(packet[layer_type].payload.original))
        # Add row to DF
        df_append = pd.DataFrame([field_values], columns=dataframe_fields)
        df = pd.concat([df, df_append], axis=0)

    # Reset Index
    df = df.reset_index()
    # Drop old index column
    df = df.drop(columns="index")
    return df

In [55]:
test_flow = build_dataframe(bi_directional_sessions[ "['100.26.125.169', '192.168.86.21', 443, 60700, 'TCP']"])

AssertionError: 29 columns passed, passed data had 28 columns

In [52]:
test_flow

Unnamed: 0,version,ihl,tos,len,id,flags,frag,ttl,proto,chksum,...,dataofs,reserved,flags.1,window,chksum.1,urgptr,options,payload,payload_raw,payload_hex
0,4,5,0,91,61340,,0,255,17,54331,...,,,,,762,,,63,b'\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00...,b'00000000000100000000000012416c656b7a616e6472...
1,4,5,0,169,0,DF,0,64,6,3283,...,8,0,PA,2048,1535,0,3,117,b'\x17\x03\x03\x00p\x00\x00\x00\x00\x00\x00\x0...,b'17030300700000000000000590887b2f38441f44547d...
2,4,5,0,169,57133,DF,0,64,6,11685,...,8,0,PA,277,46358,0,3,117,b'\x17\x03\x03\x00p\x00\x00\x00\x00\x00\x00\x0...,b'1703030070000000000000058ecb8c1f43bfac224984...
3,4,5,0,52,0,DF,0,64,6,3400,...,8,0,A,2046,41450,0,3,0,b'',b''
4,4,5,0,115,0,DF,0,64,6,10725,...,8,0,PA,2048,52217,0,3,63,b'\x17\x03\x03\x00:\x00\x00\x00\x00\x00\x00\x0...,b'170303003a00000000000002cffad9981bef8e74efb8...
5,4,5,0,55,44531,,0,64,17,40795,...,,,,,20707,,,27,b'\x05W\x01\x00\x00\x01\x00\x00\x00\x00\x00\x0...,b'05570100000100000000000005736c61636b03636f6d...
6,4,5,0,115,7472,DF,0,41,6,9141,...,8,0,PA,9,54718,0,3,63,"b""\x17\x03\x03\x00:[\xb8@\x91>\xcb\xf0\xf6\xbf...",b'170303003a5bb840913ecbf0f6bfa08eb14f827aeb59...
7,4,5,0,52,0,DF,0,64,6,10788,...,8,0,A,2047,12515,0,3,0,b'',b''
8,4,5,0,70,34278,,0,64,17,51033,...,,,,,28062,,,42,b'\x07\x80\x01\x00\x00\x01\x00\x00\x00\x00\x00...,b'0780010000010000000000000d6e6f74696669636174...
9,4,5,0,203,59728,,0,255,17,47886,...,,,,,214,,,175,b'\x00\x00U\xaa\x00\x00\x00\x00\x00\x00\x00\x0...,b'000055aa00000000000000000000009f000000007b22...


In [53]:
test_flow.columns

Index(['version', 'ihl', 'tos', 'len', 'id', 'flags', 'frag', 'ttl', 'proto',
       'chksum', 'src', 'dst', 'options', 'time', 'sport', 'dport', 'seq',
       'ack', 'dataofs', 'reserved', 'flags', 'window', 'chksum', 'urgptr',
       'options', 'payload', 'payload_raw', 'payload_hex'],
      dtype='object')

In [48]:
def get_flow_duration(df):
    idx = df.columns.get_loc("time")
    return df.iloc[-1, idx] - test_flow.iloc[0,idx]

In [49]:
get_flow_duration(test_flow)

9.168694019317627

In [None]:
def 