# Network data exploration

## Load data

Gather paths from private json file

In [None]:
import json


with open('paths.json') as f:
    json_data = json.load(f)
json_data

In [None]:
networkfile = json_data['data']['file']

Read network data from file

In [None]:
import scapy.all


a = scapy.all.rdpcap(networkfile)
sessions = a.sessions()

print('Loaded {} sessions'.format(len(sessions)))

In [None]:
sessionKeys = list(sessions.keys())
print(sessionKeys[0:2])
sessionVals = list(sessions.values())
print(sessionVals[0:2])

## Investigate pcap data

### Investigate single packet list

In [None]:
sessionVals[0].show()

In [None]:
sessionVals[0].make_table(lambda x:(x[scapy.all.IP].dport, x[scapy.all.TCP].dport, x[scapy.all.TCP].payload_guess))

In [None]:
sessionVals[0][0][scapy.all.IP]

In [None]:
sessionVals[0][0][scapy.all.TCP]

### Fill data in dataframe

Define extractors for a single packet

In [None]:
import numpy as np


errors = {}

def catch_exception(func):
    def func_wrapper(*args, **kargs):
        try:
            return func(*args, **kargs)    
        except (IndexError, AttributeError) as e:
            err = e.__class__.__name__ + ': ' + str(*e.args)
            if err not in errors.keys():
                errors[err] = 1
            else:
                errors[err] += 1
            return [np.NaN] * len(args[0])
    return func_wrapper

In [None]:
# define Ethernet functions
@catch_exception
def extract_source_MAC(session):
    return list(map(lambda x: x[scapy.all.Ether].src, session))

@catch_exception
def extract_destination_MAC(session):
    return list(map(lambda x: x[scapy.all.Ether].dst, session))

# define IP functions

@catch_exception
def extract_IP_flags(session):
    return list(map(lambda x: x[scapy.all.IP].sprintf("%flags%"), session))

@catch_exception
def extract_IP_time(session):
    return list(map(lambda x: x[scapy.all.IP].time, session))

@catch_exception
def extract_source_IPs(session):
    return list(map(lambda x: x[scapy.all.IP].src, session))

@catch_exception
def extract_destination_IPs(session):
    return list(map(lambda x: x[scapy.all.IP].dst, session))

#@catch_exception
#def extract_destination_IP_ports(session):
#    return list(map(lambda x: x[scapy.all.IP].dport, session))

#@catch_exception
#def extract_source_IP_ports(session):
#    return list(map(lambda x: x[scapy.all.IP].sport, session))


# define UDP functions

@catch_exception
def extract_destination_UDP_ports(session):
    return list(map(lambda x: x[scapy.all.UDP].dport, session))

@catch_exception
def extract_source_UDP_ports(session):
    return list(map(lambda x: x[scapy.all.UDP].sport, session))

@catch_exception
def extract_UDP_flags(session):
    return list(map(lambda x: x[scapy.all.UDP].sprintf("%flags%"), session))


# define TCP functions

@catch_exception
def extract_TCP_flags(session):
    return list(map(lambda x: x[scapy.all.TCP].sprintf("%flags%"), session))

@catch_exception
def extract_destination_TCP_ports(session):
    return list(map(lambda x: x[scapy.all.TCP].dport, session))

@catch_exception
def extract_source_TCP_ports(session):
    return list(map(lambda x: x[scapy.all.TCP].sport, session))

@catch_exception
def extract_TCP_seq(session):
    return list(map(lambda x: x[scapy.all.TCP].seq, session))

@catch_exception
def extract_TCP_ack(session):
    return list(map(lambda x: x[scapy.all.TCP].ack, session))

# bookkeeping

def extract_session_ID(sessions):
    session_IDs = []
    for index, session in enumerate(sessions):
        session_IDs.extend([index] * len(session))
    return session_IDs

def extract_packet_IDs(session):
    return list(range(0, len(session)))

Perform extraction for all packets

In [None]:
import itertools


def flatten_map(func, iterable):
    return list(itertools.chain(*list(map(func, iterable))))

def extract_data(sessions):
    print('This session list has ' + str(len(sessions)) + ' sessions')
    source_IPs = flatten_map(extract_source_IPs, sessions)
    destination_IPs = flatten_map(extract_destination_IPs, sessions)
    source_TCP_ports = flatten_map(extract_source_TCP_ports, sessions)
    destination_TCP_ports = flatten_map(extract_destination_TCP_ports, sessions)
    TCP_flags = flatten_map(extract_TCP_flags, sessions)
    IP_flags = flatten_map(extract_IP_flags, sessions)
    source_UDP_ports = flatten_map(extract_source_UDP_ports, sessions)
    destination_UDP_ports = flatten_map(extract_destination_UDP_ports, sessions)
    IP_time = flatten_map(extract_IP_time, sessions)
    #source_IP_ports = flatten_map(extract_source_IP_ports, sessions)
    packet_IDs = flatten_map(extract_packet_IDs, sessions)
    session_IDs = extract_session_ID(sessions)
    UDP_flags = flatten_map(extract_UDP_flags, sessions)
    source_MACs = flatten_map(extract_source_MAC, sessions)
    destination_MACs = flatten_map(extract_destination_MAC, sessions)
    TCP_seqs = flatten_map(extract_TCP_seq, sessions)
    TCP_acks = flatten_map(extract_TCP_ack, sessions)

    data = {}
    data['session_ID'] = session_IDs
    data['packet_ID'] = packet_IDs
    data['IP_time'] = IP_time
    data['source_IP'] = source_IPs
    #data['source_IP_ports'] = source_IP_ports
    data['destination_IP'] = destination_IPs
    #data['destination_IP_port'] = destination_IP_ports
    data['source_TCP_port'] = source_TCP_ports
    data['destination_TCP_port'] = destination_TCP_ports
    data['IP_flags'] = IP_flags
    data['TCP_flags'] = TCP_flags
    data['UDP_flags'] = UDP_flags
    data['source_UDP_port'] = source_UDP_ports
    data['destination_UDP_port'] = destination_UDP_ports
    data['source_MAC'] = source_MACs
    data['destination_MAC'] = destination_MACs
    data['TCP_seq'] = TCP_seqs
    data['TCP_ack'] = TCP_acks

    return data

In [None]:
data = extract_data(sessionVals[0:])
print(errors)

In [None]:
import pandas as pd

df = pd.DataFrame(data)
df['IP_flags'].unique()

In [None]:
df.loc[[159220, 159221]]

In [None]:
df.loc[0:100]

In [None]:
df.info()

In [None]:
df.sort_values(by=['IP_time'])

### Plot data

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

plt.subplots(figsize=(20,15))
g=sns.countplot(x="TCP_flags", data=df)
#g.set_xticklabels(g.get_xticklabels(), rotation=0)
print('Plot it...')


In [None]:
def cat_corr(df, x, y):
    modules = df[x].unique()
    types = df[y].unique()
    cat = [[0 for i in range(len(types))] for j in range(len(modules))]
    # prepare cross-correlation
    count_series = df.groupby([x, y]).size()
    # fill list of lists
    for imod, mod in enumerate(modules):
        for ityp, typ in enumerate(types):
            try:
                cat[imod][ityp] = count_series[mod][typ]
            except KeyError:
                pass
    return cat

In [None]:
df_dropna = df[['source_IP', 'TCP_flags']].dropna()
plt.subplots(figsize=(20,15))
sns.heatmap(cat_corr(df_dropna, 'source_IP', 'TCP_flags'), yticklabels=df_dropna['source_IP'].unique(), xticklabels=df_dropna['TCP_flags'].unique())

In [None]:
df_dropna = df[['source_IP', 'source_MAC']].dropna()
#df_dropna = df.loc[df['source_MAC'] == "9c:c7:a6:6c:5b:b5"]
#print(df_dropna['source_IP'].unique())
sns.relplot(x="source_IP", y="source_MAC", data=df_dropna)
plt.xticks(rotation=90)
g = plt.gcf()
g.set_size_inches(20, 15)

In [None]:
df_dropna = df[['destination_IP', 'destination_MAC']].dropna()
sns.relplot(x="destination_IP", y="destination_MAC", data=df_dropna)
plt.xticks(rotation=90)
g = plt.gcf()
g.set_size_inches(20, 15)