In [1]:
import glob
import tensorflow as tf 
import numpy as np
import pandas as pd
from scapy.all import *
import seaborn as sns
sns.set(color_codes=True)
%matplotlib inline

In [2]:
pcap = rdpcap("/Users/kyletopasna/Documents/hunter/pcap/1548216696.814641.pcap")

In [3]:
pcap

<1548216696.814641.pcap: TCP:119 UDP:38 ICMP:0 Other:7>

In [4]:
# Retrieving a single item from packet list
ethernet_frame = pcap[0]
ip_packet = ethernet_frame.payload
segment = ip_packet.payload
data = segment.payload # Retrieve payload that comes after layer 4

# Observe that we just popped off previous layer header
print(ethernet_frame.summary())
print(ip_packet.summary())
print(segment.summary())
print(data.summary()) # If blank, empty object

# Complete depiction of paket
## Achieving understanding that these are the fields will enable the ability 
## to ask the data more meaningful questions ie) type of layer 4 segment is defined in layer 3 packet
ethernet_frame.show()

802.3 38:8b:59:c5:da:3f > 01:80:c2:00:00:00 / LLC / STP
LLC / STP
STP

###[ 802.3 ]### 
  dst       = 01:80:c2:00:00:00
  src       = 38:8b:59:c5:da:3f
  len       = 38
###[ LLC ]### 
     dsap      = 0x42
     ssap      = 0x42
     ctrl      = 3
###[ Spanning Tree Protocol ]### 
        proto     = 0
        version   = 0
        bpdutype  = 0
        bpduflags = 0
        rootid    = 31488
        rootmac   = 38:8b:59:c5:da:3d
        pathcost  = 0
        bridgeid  = 31488
        bridgemac = 38:8b:59:c5:da:3d
        portid    = 32772
        age       = 0.0
        maxage    = 6.0
        hellotime = 2.0
        fwddelay  = 3.0



Test code for organizing numerical information from packets in a pandas dataframe.

Ref : https://secdevops.ai/learning-packet-analysis-with-data-science-5356a3340d4e

In [5]:
# Collect field names from IP/TCP/UDP (These will be columns in DF)
ip_fields = [field.name for field in IP().fields_desc]
tcp_fields = [field.name for field in TCP().fields_desc]
udp_fields = [field.name for field in UDP().fields_desc]

dataframe_fields = ip_fields + ['time'] + tcp_fields + ['payload','payload_raw','payload_hex']

# Create blank DataFrame
df = pd.DataFrame(columns=dataframe_fields)
for packet in pcap[IP]:
    # Field array for each row of DataFrame
    field_values = []
    # Add all IP fields to dataframe
    for field in ip_fields:
        if field == 'options':
            # Retrieving number of options defined in IP Header
            field_values.append(len(packet[IP].fields[field]))
        else:
            field_values.append(packet[IP].fields[field])
    
    field_values.append(packet.time)
    
    layer_type = type(packet[IP].payload)
    for field in tcp_fields:
        try:
            if field == 'options':
                field_values.append(len(packet[layer_type].fields[field]))
            else:
                field_values.append(packet[layer_type].fields[field])
        except:
            field_values.append(None)
    
    # Append payload
    field_values.append(len(packet[layer_type].payload))
    field_values.append(packet[layer_type].payload.original)
    field_values.append(binascii.hexlify(packet[layer_type].payload.original))
    # Add row to DF
    df_append = pd.DataFrame([field_values], columns=dataframe_fields)
    df = pd.concat([df, df_append], axis=0)

# Reset Index
df = df.reset_index()
# Drop old index column
df = df.drop(columns="index")

In [6]:
df.head()

Unnamed: 0,version,ihl,tos,len,id,flags,frag,ttl,proto,chksum,...,dataofs,reserved,flags.1,window,chksum.1,urgptr,options,payload,payload_raw,payload_hex
0,4,5,0,91,61340,,0,255,17,54331,...,,,,,762,,,63,b'\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00...,b'00000000000100000000000012416c656b7a616e6472...
1,4,5,0,169,0,DF,0,64,6,3283,...,8.0,0.0,PA,2048.0,1535,0.0,3.0,117,b'\x17\x03\x03\x00p\x00\x00\x00\x00\x00\x00\x0...,b'17030300700000000000000590887b2f38441f44547d...
2,4,5,0,169,57133,DF,0,64,6,11685,...,8.0,0.0,PA,277.0,46358,0.0,3.0,117,b'\x17\x03\x03\x00p\x00\x00\x00\x00\x00\x00\x0...,b'1703030070000000000000058ecb8c1f43bfac224984...
3,4,5,0,52,0,DF,0,64,6,3400,...,8.0,0.0,A,2046.0,41450,0.0,3.0,0,b'',b''
4,4,5,0,115,0,DF,0,64,6,10725,...,8.0,0.0,PA,2048.0,52217,0.0,3.0,63,b'\x17\x03\x03\x00:\x00\x00\x00\x00\x00\x00\x0...,b'170303003a00000000000002cffad9981bef8e74efb8...


In [7]:
df.columns

Index(['version', 'ihl', 'tos', 'len', 'id', 'flags', 'frag', 'ttl', 'proto',
       'chksum', 'src', 'dst', 'options', 'time', 'sport', 'dport', 'seq',
       'ack', 'dataofs', 'reserved', 'flags', 'window', 'chksum', 'urgptr',
       'options', 'payload', 'payload_raw', 'payload_hex'],
      dtype='object')

In [8]:
df[["src","sport","dst","dport"]]

Unnamed: 0,src,sport,dst,dport
0,192.168.86.21,5353,224.0.0.251,5353
1,192.168.86.21,60604,192.168.86.22,8009
2,192.168.86.22,8009,192.168.86.21,60604
3,192.168.86.21,60604,192.168.86.22,8009
4,192.168.86.21,60832,34.212.215.14,443
5,192.168.86.21,12480,192.168.86.1,53
6,34.212.215.14,443,192.168.86.21,60832
7,192.168.86.21,60832,34.212.215.14,443
8,192.168.86.21,49170,192.168.86.1,53
9,192.168.86.26,49154,255.255.255.255,6666


We just saw a bunch of packets flowing back and forth, but it's hard to pick out individual streams. Here we present a function that builds bidirectional flows.

ref: https://pen-testing.sans.org/blog/2017/10/13/scapy-full-duplex-stream-reassembly

In [9]:
def get_sessions(packet):
    sess = "Other"
    if "Ether" in packet:
        if "IP" in packet:
            if "TCP" in packet:
                sess = str(sorted(["TCP", packet["IP"].src, packet["TCP"].sport,
                                  packet["IP"].dst, packet["TCP"].dport], key=str))
            elif "UDP" in packet:
                sess = str(sorted(["UDP", packet["IP"].src, packet["UDP"].sport,
                                  packet["IP"].dst, packet["UDP"].dport], key=str))
            elif "ICMP" in packet:
                sess = str(sorted(["ICMP", packet["IP"].src, packet["IP"].dst,
                                  packet["ICMP"].code, packet["ICMP"].type, packet["ICMP"].id], key=str))
            else:
                sess = str(sorted(["IP", packet["IP"].src, packet["IP"].dst,
                                  packet["IP"].proto], key=str))
        elif "ARP" in packet:
            sess = str(sorted(["ARP", packet["ARP"].psrc, packet["ARP"].pdst], key=str))
        else:
            sess = packet.sprintf("Ethernet type = %04xr,Ether.type%")
    return sess

In [66]:
bi_directional_sessions = pcap.sessions(get_sessions)
print(len(bi_directional_sessions))
print(bi_directional_sessions)

23
{'Other': <PacketList: TCP:0 UDP:0 ICMP:0 Other:5>, "['192.168.86.21', '224.0.0.251', 5353, 5353, 'UDP']": <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>, 'Ethernet type = 86dd': <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>, "['192.168.86.21', '192.168.86.22', 60604, 8009, 'TCP']": <PacketList: TCP:6 UDP:0 ICMP:0 Other:0>, "['192.168.86.21', '34.212.215.14', 443, 60832, 'TCP']": <PacketList: TCP:9 UDP:0 ICMP:0 Other:0>, "[12480, '192.168.86.1', '192.168.86.21', 53, 'UDP']": <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>, "['192.168.86.1', '192.168.86.21', 49170, 53, 'UDP']": <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>, "['192.168.86.26', '255.255.255.255', 49154, 6666, 'UDP']": <PacketList: TCP:0 UDP:3 ICMP:0 Other:0>, "['192.168.86.21', 443, '52.84.19.49', 63068, 'TCP']": <PacketList: TCP:18 UDP:0 ICMP:0 Other:0>, "['172.217.14.206', '192.168.86.21', 443, 63069, 'TCP']": <PacketList: TCP:26 UDP:0 ICMP:0 Other:0>, "['104.115.156.95', '192.168.86.21', 443, 61432, 'TCP']": <PacketList: TCP:4 UDP:0 ICMP:

In [11]:
for flow in bi_directional_sessions:
    print(flow)

Other
['192.168.86.21', '224.0.0.251', 5353, 5353, 'UDP']
Ethernet type = 86dd
['192.168.86.21', '192.168.86.22', 60604, 8009, 'TCP']
['192.168.86.21', '34.212.215.14', 443, 60832, 'TCP']
[12480, '192.168.86.1', '192.168.86.21', 53, 'UDP']
['192.168.86.1', '192.168.86.21', 49170, 53, 'UDP']
['192.168.86.26', '255.255.255.255', 49154, 6666, 'UDP']
['192.168.86.21', 443, '52.84.19.49', 63068, 'TCP']
['172.217.14.206', '192.168.86.21', 443, 63069, 'TCP']
['104.115.156.95', '192.168.86.21', 443, 61432, 'TCP']
['192.168.86.21', 5228, 60605, '74.125.20.188', 'TCP']
['192.168.86.1', '192.168.86.21', 'ARP']
[1900, '192.168.86.249', '239.255.255.250', 44673, 'UDP']
['162.254.195.45', '192.168.86.21', 27018, 52731, 'UDP']
['192.168.86.21', 443, 63066, '64.71.168.222', 'TCP']
[1900, '192.168.86.1', '239.255.255.250', 50979, 'UDP']
['192.168.86.21', '34.232.24.250', 443, 63058, 'TCP']
['162.125.34.129', '192.168.86.21', 443, 62944, 'TCP']
['192.168.86.21', 443, 63070, '64.4.54.254', 'TCP']
[17500,

In [12]:
bi_directional_sessions

{'Ethernet type = 86dd': <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>,
 'Other': <PacketList: TCP:0 UDP:0 ICMP:0 Other:5>,
 "['100.26.125.169', '192.168.86.21', 443, 60700, 'TCP']": <PacketList: TCP:4 UDP:0 ICMP:0 Other:0>,
 "['104.115.156.95', '192.168.86.21', 443, 61432, 'TCP']": <PacketList: TCP:4 UDP:0 ICMP:0 Other:0>,
 "['162.125.34.129', '192.168.86.21', 443, 62944, 'TCP']": <PacketList: TCP:6 UDP:0 ICMP:0 Other:0>,
 "['162.254.195.45', '192.168.86.21', 27018, 52731, 'UDP']": <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>,
 "['172.217.14.206', '192.168.86.21', 443, 63069, 'TCP']": <PacketList: TCP:26 UDP:0 ICMP:0 Other:0>,
 "['192.168.86.1', '192.168.86.21', 'ARP']": <PacketList: TCP:0 UDP:0 ICMP:0 Other:2>,
 "['192.168.86.1', '192.168.86.21', 49170, 53, 'UDP']": <PacketList: TCP:0 UDP:2 ICMP:0 Other:0>,
 "['192.168.86.21', '192.168.86.22', 60604, 8009, 'TCP']": <PacketList: TCP:6 UDP:0 ICMP:0 Other:0>,
 "['192.168.86.21', '224.0.0.251', 5353, 5353, 'UDP']": <PacketList: TCP:0 UDP:2 ICMP:0

In [39]:
bi_directional_sessions[ "['172.217.14.206', '192.168.86.21', 443, 63069, 'TCP']"]

<PacketList: TCP:26 UDP:0 ICMP:0 Other:0>

In [40]:
# Collect field names from IP/TCP/UDP (These will be columns in DF)

def build_dataframe(packet_list):

    ip_fields = [field.name for field in IP().fields_desc]
    tcp_fields = [field.name for field in TCP().fields_desc]
    udp_fields = [field.name for field in UDP().fields_desc]

    dataframe_fields = ip_fields + ['time'] + tcp_fields + ['size','payload','payload_raw','payload_hex']

    # Create blank DataFrame
    df = pd.DataFrame(columns=dataframe_fields)
    for packet in packet_list[IP]:
        # Field array for each row of DataFrame
        field_values = []
        # Add all IP fields to dataframe
        for field in ip_fields:
            if field == 'options':
                # Retrieving number of options defined in IP Header
                field_values.append(len(packet[IP].fields[field]))
            else:
                field_values.append(packet[IP].fields[field])

        field_values.append(packet.time)

        layer_type = type(packet[IP].payload)
        for field in tcp_fields:
            try:
                if field == 'options':
                    field_values.append(len(packet[layer_type].fields[field]))
                else:
                    field_values.append(packet[layer_type].fields[field])
            except:
                field_values.append(None)
        
        # Append payload
        field_values.append(len(packet))
        field_values.append(len(packet[layer_type].payload))
        field_values.append(packet[layer_type].payload.original)
        field_values.append(binascii.hexlify(packet[layer_type].payload.original))
        # Add row to DF
        df_append = pd.DataFrame([field_values], columns=dataframe_fields)
        df = pd.concat([df, df_append], axis=0)
        
    # Reset Index
    df = df.reset_index()
    # Drop old index column
    df = df.drop(columns="index")
    return df

In [43]:
test_flow = build_dataframe(bi_directional_sessions[ "['172.217.14.206', '192.168.86.21', 443, 63069, 'TCP']"])

In [44]:
test_flow

Unnamed: 0,version,ihl,tos,len,id,flags,frag,ttl,proto,chksum,...,reserved,flags.1,window,chksum.1,urgptr,options,size,payload,payload_raw,payload_hex
0,4,5,0,64,0,DF,0,64,6,26707,...,0,S,65535,30586,0,8,78,0,b'',b''
1,4,5,0,60,3627,,0,118,6,25644,...,0,SA,60192,25879,0,5,74,0,b'',b''
2,4,5,0,52,0,DF,0,64,6,26719,...,0,A,2052,30327,0,3,66,0,b'',b''
3,4,5,0,591,0,DF,0,64,6,26180,...,0,PA,2052,52879,0,3,605,539,b'\x16\x03\x01\x02\x16\x01\x00\x02\x12\x03\x03...,b'1603010216010002120303e3b1af1de085f3129030cc...
4,4,5,0,52,3685,,0,118,6,25594,...,0,A,240,31538,0,3,66,0,b'',b''
5,4,5,0,208,3686,,0,118,6,25437,...,0,PA,240,19350,0,3,222,156,"b""\x16\x03\x03\x00d\x02\x00\x00`\x03\x03\\G\xe...",b'16030300640200006003035c47e9702d2744fcc2a1cd...
6,4,5,0,52,0,DF,0,64,6,26719,...,0,A,2049,29513,0,3,66,0,b'',b''
7,4,5,0,103,0,DF,0,64,6,26668,...,0,PA,2049,5869,0,3,117,51,b'\x14\x03\x03\x00\x01\x01\x16\x03\x03\x00(\x0...,b'14030300010116030300280000000000000000c09edd...
8,4,5,0,145,0,DF,0,64,6,26626,...,0,PA,2049,20166,0,3,159,93,b'\x17\x03\x03\x00X\x00\x00\x00\x00\x00\x00\x0...,b'170303005800000000000000014f762aafe282b1c071...
9,4,5,0,1193,0,DF,0,64,6,25578,...,0,PA,2049,37302,0,3,1207,1141,b'\x17\x03\x03\x04p\x00\x00\x00\x00\x00\x00\x0...,b'17030304700000000000000002276680756f740ca381...


In [73]:
test_flow["src"]

0      192.168.86.21
1     172.217.14.206
2      192.168.86.21
3      192.168.86.21
4     172.217.14.206
5     172.217.14.206
6      192.168.86.21
7      192.168.86.21
8      192.168.86.21
9      192.168.86.21
10     192.168.86.21
11    172.217.14.206
12     192.168.86.21
13     192.168.86.21
14    172.217.14.206
15     192.168.86.21
16    172.217.14.206
17    172.217.14.206
18    172.217.14.206
19    172.217.14.206
20    172.217.14.206
21     192.168.86.21
22     192.168.86.21
23     192.168.86.21
24     192.168.86.21
25    172.217.14.206
Name: src, dtype: object

In [46]:
test_flow.columns

Index(['version', 'ihl', 'tos', 'len', 'id', 'flags', 'frag', 'ttl', 'proto',
       'chksum', 'src', 'dst', 'options', 'time', 'sport', 'dport', 'seq',
       'ack', 'dataofs', 'reserved', 'flags', 'window', 'chksum', 'urgptr',
       'options', 'size', 'payload', 'payload_raw', 'payload_hex'],
      dtype='object')

In [47]:
def get_src_ip(df):
    return df["src"].unique().tolist()[0]

def get_dst_ip(df):
    return df["src"].unique().tolist()[1]

In [48]:
def get_flow_duration(df):
    idx = df.columns.get_loc("time")
    return 1000000 * (df.iloc[-1, idx] - test_flow.iloc[0,idx])

In [49]:
get_flow_duration(test_flow)

563146.8296051025

In [52]:
def get_total_len_forward_packets(df):
    src = get_src_ip(df)
    src_df = df.loc[df["src"]==src]
    return src_df["payload"].sum()

get_total_len_forward_packets(test_flow)
    
#len(bi_directional_sessions[ "['100.26.125.169', '192.168.86.21', 443, 60700, 'TCP']"][1])

2076

In [53]:
def get_total_len_backward_packets(df):
    bwd = get_dst_ip(df)
    bwd_df = df.loc[df["src"]==bwd]
    return bwd_df["payload"].sum()

get_total_len_backward_packets(test_flow)

1629

In [77]:
def get_total_forward_packets(df):
    
    """
    This function calculates the total number of packets that
    originated from the source IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_src_ip(df)
    return  df.loc[df['src']==src].shape[0]

    
def get_total_backward_packets(df):
    
    """
    This function calculates the total number of packets that
    originated from the destination IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_dst_ip(df)
    return  df.loc[df['src']==src].shape[0]
    
print(get_total_forward_packets(test_flow))
print(get_total_backward_packets(test_flow))


15
11


In [83]:
def get_min_forward_packet_size(df):
    
    """
    This function calculates the minimum payload size that
    originated from the source IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_src_ip(df)
    src_df = df.loc[df["src"]==src]
    return  min(src_df["payload"])

def get_min_backward_packet_size(df):
    
    """
    This function calculates the minimum payload size that
    originated from the destination IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_dst_ip(df)
    src_df = df.loc[df["src"]==src]
    return  min(src_df["payload"])

get_min_forward_packet_size(test_flow)
get_min_backward_packet_size(test_flow)

0

In [85]:
def get_max_forward_packet_size(df):
    
    """
    This function calculates the minimum payload size that
    originated from the source IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_src_ip(df)
    src_df = df.loc[df["src"]==src]
    return  max(src_df["payload"])

def get_max_backward_packet_size(df):
    
    """
    This function calculates the minimum payload size that
    originated from the destination IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_dst_ip(df)
    src_df = df.loc[df["src"]==src]
    return  max(src_df["payload"])

print(get_max_forward_packet_size(test_flow))
print(get_max_backward_packet_size(test_flow))

1141
494


In [86]:
def get_mean_forward_packet_size(df):
    
    """
    This function calculates the minimum payload size that
    originated from the source IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_src_ip(df)
    src_df = df.loc[df["src"]==src]
    return  src_df["payload"].mean()

def get_mean_backward_packet_size(df):
    
    """
    This function calculates the minimum payload size that
    originated from the destination IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_dst_ip(df)
    src_df = df.loc[df["src"]==src]
    return  src_df["payload"].mean()

print(get_mean_forward_packet_size(test_flow))
print(get_mean_backward_packet_size(test_flow))

138.4
148.0909090909091


In [87]:
def get_std_forward_packet_size(df):
    
    """
    This function calculates the minimum payload size that
    originated from the source IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_src_ip(df)
    src_df = df.loc[df["src"]==src]
    return  src_df["payload"].std()

def get_std_backward_packet_size(df):
    
    """
    This function calculates the minimum payload size that
    originated from the destination IP address
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_dst_ip(df)
    src_df = df.loc[df["src"]==src]
    return  src_df["payload"].std()

print(get_std_forward_packet_size(test_flow))
print(get_std_backward_packet_size(test_flow))

310.31292961414664
203.42490238191317


In [99]:
def get_iat_forward_total_time(df):
    
    """
    This function calculates the total inter arrival 
    time (iat) of packets from the source IP address.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """

    src = get_src_ip(df)
    src_df = df.loc[df["src"]==src]
    return  src_df["time"].diff().sum() * 1000000

def get_iat_backward_total_time(df):
    
    """
    This function calculates the total inter arrival 
    time (iat) of packets from the destination IP address.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """

    src = get_dst_ip(df)
    src_df = df.loc[df["src"]==src]
    return  src_df["time"].diff().sum() * 1000000

print(get_iat_forward_total_time(test_flow))
print(get_iat_backward_total_time(test_flow))
print(get_iat_forward_total_time(test_flow) - get_iat_backward_total_time(test_flow))

514256.95419311523
506584.88273620605
7672.07145690918


In [115]:
a = get_src_ip(test_flow)
b = test_flow.loc[test_flow["src"]==a]
c = b["time"].tolist()
d = []
for i in range(len(c)):
    if i+1 == len(c):
        break
    else:
        y = c[i+1] - c[i]
        #print(y)
        d.append(y)
print(d)
print(sum(d))


[0.05760002136230469, 0.0007069110870361328, 0.06265807151794434, 0.0016188621520996094, 0.0017380714416503906, 0.0017518997192382812, 0.0009009838104248047, 0.0396881103515625, 0.001589059829711914, 0.0028569698333740234, 0.34024786949157715, 0.0008981227874755859, 0.001035928726196289, 0.0009660720825195312]
0.5142569541931152


In [116]:
def get_src_times(df):
    
    """
    This function returns the "time" Series object 
    from the passed in dataframe for the session
    source.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_src_ip(df)
    src_df = df.loc[df["src"]==src]
    return  src_df["time"]

def get_dst_times(df):
    
    """
    This function returns the "time" Series object 
    from the passed in dataframe for the session
    destination.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_dst_ip(df)
    src_df = df.loc[df["src"]==src]
    return  src_df["time"]

print(get_src_times(test_flow))
print(get_dst_times(test_flow))

0     1.548217e+09
2     1.548217e+09
3     1.548217e+09
6     1.548217e+09
7     1.548217e+09
8     1.548217e+09
9     1.548217e+09
10    1.548217e+09
12    1.548217e+09
13    1.548217e+09
15    1.548217e+09
21    1.548217e+09
22    1.548217e+09
23    1.548217e+09
24    1.548217e+09
Name: time, dtype: float64
1     1.548217e+09
4     1.548217e+09
5     1.548217e+09
11    1.548217e+09
14    1.548217e+09
16    1.548217e+09
17    1.548217e+09
18    1.548217e+09
19    1.548217e+09
20    1.548217e+09
25    1.548217e+09
Name: time, dtype: float64


In [125]:
def get_iat_forward_min_times(df):
    
    """
    This function returns the minimum inter arrival
    time (IAT) between packets from the source.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src_times = get_src_times(df)
    return  min(src_times.diff().dropna()) * 1000000

def get_iat_backwards_min_times(df):
    
    """
    This function returns the minimum inter arrival
    time (IAT) between packets from the destination.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src_times = get_dst_times(df)
    return  min(src_times.diff().dropna()) * 1000000

print(get_iat_forward_min_times(test_flow))
print(get_iat_backwards_min_times(test_flow))

706.9110870361328
1478.9104461669922


In [126]:
def get_iat_forward_max_times(df):
    
    """
    This function returns the maximum inter arrival
    time (IAT) between packets from the source.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src_times = get_src_times(df)
    return  max(src_times.diff().dropna()) * 1000000

def get_iat_backwards_max_times(df):
    
    """
    This function returns the maximum inter arrival
    time (IAT) between packets from the destination.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src_times = get_dst_times(df)
    return  max(src_times.diff().dropna()) * 1000000

print(get_iat_forward_max_times(test_flow))
print(get_iat_backwards_max_times(test_flow))

340247.86949157715
250015.0203704834


In [127]:
def get_iat_forward_mean_times(df):
    
    """
    This function returns the mean inter arrival
    time (IAT) between packets from the source.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src_times = get_src_times(df)
    return  src_times.diff().dropna().mean() * 1000000

def get_iat_backwards_mean_times(df):
    
    """
    This function returns the mean inter arrival
    time (IAT) between packets from the destination.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src_times = get_dst_times(df)
    return  src_times.diff().dropna().mean() * 1000000

print(get_iat_forward_mean_times(test_flow))
print(get_iat_backwards_mean_times(test_flow))

36732.63958522251
50658.488273620605


In [128]:
def get_iat_forward_std_times(df):
    
    """
    This function returns the standard deviation for inter arrival
    time (IAT) between packets from the source.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src_times = get_src_times(df)
    return  src_times.diff().dropna().std() * 1000000

def get_iat_backwards_std_times(df):
    
    """
    This function returns the standard deviation inter arrival
    time (IAT) between packets from the destination.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src_times = get_dst_times(df)
    return  src_times.diff().dropna().std() * 1000000

print(get_iat_forward_std_times(test_flow))
print(get_iat_backwards_std_times(test_flow))

90179.76173356772
76004.98265222843


In [129]:
test_flow.columns

Index(['version', 'ihl', 'tos', 'len', 'id', 'flags', 'frag', 'ttl', 'proto',
       'chksum', 'src', 'dst', 'options', 'time', 'sport', 'dport', 'seq',
       'ack', 'dataofs', 'reserved', 'flags', 'window', 'chksum', 'urgptr',
       'options', 'size', 'payload', 'payload_raw', 'payload_hex'],
      dtype='object')

# Need to find flag informatio now

In [None]:
test_flow[["flags", "src", "options", 'size']]

flags = {
    'F': 'FIN',
    'S': 'SYN',
    'R': 'RST',
    'P': 'PSH',
    'A': 'ACK',
    'U': 'URG',
    'E': 'ECE',
    'C': 'CWR',
}


In [136]:
test_flow = build_dataframe(bi_directional_sessions[ "['172.217.14.206', '192.168.86.21', 443, 63069, 'TCP']"])
test_flow = build_dataframe(bi_directional_sessions[ "['192.168.86.21', 443, 63070, '64.4.54.254', 'TCP']"])

In [139]:
test_flow[["flags", "src"]]

Unnamed: 0,flags,flags.1,src
0,DF,S,192.168.86.21
1,DF,SA,64.4.54.254
2,DF,A,192.168.86.21
3,DF,PA,192.168.86.21
4,DF,A,64.4.54.254
5,DF,A,64.4.54.254
6,DF,PA,64.4.54.254
7,DF,A,192.168.86.21
8,DF,A,192.168.86.21
9,DF,A,192.168.86.21


In [160]:
def remove_duplicate_flags_col(df):
    
    """
    This function removes the first occurence
    of the 'flags' column due to multiple
    columns named 'flags'
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    column_numbers = [x for x in range(df.shape[1])]
    column_numbers.remove(5)
    return df.iloc[:, column_numbers]

In [229]:
def decode_flags(df):
    
    """
    This function decodes the bitwise flag
    into a string.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
        
    """
    
    return df["flags"].apply(lambda x: str(x))

def count_flags(df, ip, flag):
    
    """
    This function counts the total number of
    flags from the specified origin.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
        ip (String): A string representation of the IP address
        flag (String): The first letter of the flag to search.
    """
    
    df = df.loc[df["src"]==ip]
    df["flags"] = decode_flags(df).str.contains(flag)
    return df[df["flags"] == True].shape[0]
    

In [238]:
def get_total_forward_push_flags(df):
    
    """
    This function calculates the total number of
    push flags in the forward direction.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    
    df = remove_duplicate_flags_col(df)
    src = get_src_ip(df)
    return count_flags(df, src, "P")

def get_total_backward_push_flags(df):
    
    """
    This function calculates the total number of
    push flags in the forward direction.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    
    df = remove_duplicate_flags_col(df)
    src = get_dst_ip(df)
    return count_flags(df, src, "P")

print(get_total_forward_push_flags(test_flow))
print(get_total_backward_push_flags(test_flow))

4
3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [242]:
test_flow[["flags","src","sport"]]

Unnamed: 0,flags,flags.1,src,sport
0,DF,S,192.168.86.21,63070
1,DF,SA,64.4.54.254,443
2,DF,A,192.168.86.21,63070
3,DF,PA,192.168.86.21,63070
4,DF,A,64.4.54.254,443
5,DF,A,64.4.54.254,443
6,DF,PA,64.4.54.254,443
7,DF,A,192.168.86.21,63070
8,DF,A,192.168.86.21,63070
9,DF,A,192.168.86.21,63070


In [239]:
def get_total_forward_urgent_flags(df):
    
    """
    This function calculates the total number of
    urgent flags in the forward direction.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    
    df = remove_duplicate_flags_col(df)
    src = get_src_ip(df)
    return count_flags(df, src, "U")

def get_total_backward_urgent_flags(df):
    
    """
    This function calculates the total number of
    urgent flags in the forward direction.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    
    df = remove_duplicate_flags_col(df)
    src = get_dst_ip(df)
    return count_flags(df, src, "U")

print(get_total_forward_urgent_flags(test_flow))
print(get_total_backward_urgent_flags(test_flow))

0
0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [252]:
def get_total_header_len_forward_packets(df):
    
    """
    This function calculates the total size
    of headers in the forward direction.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
        
    src = get_src_ip(df)
    src_df = df[df["src"]==src]
    return src_df["size"].sum() - get_total_len_forward_packets(df)

def get_total_header_len_backward_packets(df):
    
    """
    This function calculates the total size
    of headers in the backward direction.
        
    Args:
        df (Dataframe): A bi-directional flow pandas dataframe.
    """
    
    src = get_dst_ip(df)
    src_df = df[df["src"]==src]
    return src_df["size"].sum() - get_total_len_forward_packets(df)


print(get_total_header_len_forward_packets(test_flow))
print(get_total_header_len_backward_packets(test_flow))

804
3294


In [250]:
get_total_forward_packets(test_flow)
get_total_len_forward_packets(test_flow)

1208