# CSCI E-63 Final Project - Network Monitoring Analytics
Author: Joshua Langford

This project expands upon existing research conducted by Mouad Lasri (Github: https://github.com/mouadlasri/datapipeline-TCP-UDP-neo4j)


In [1]:
# Installing python packages to parse PCAP files and interface with Neo4j.
import sys
!{sys.executable} -m pip install py2neo
!{sys.executable} -m pip install scapy



In [2]:
# Import packages from pcapfile to interpret the input PCAP file, packages from py2neo to generate graphs 
# within Neo4j, and packages from scapy to parse PCAP into packets
from scapy.all import *
from py2neo import Graph, Node, Relationship
import pandas as pd

In [3]:
# Parse the capture file into individual packets and inspect
packets = rdpcap("traffic.pcap")

In [4]:
# Import TCP/UDP port to protocol guessing tables
tcp_ports = pd.read_csv("tcp_ports.csv")
udp_ports = pd.read_csv("udp_ports.csv")
tcp_port_dict = {}
udp_port_dict = {}
for index, row in tcp_ports.iterrows():
    tcp_port_dict[row['port']] = row['description']
for index, row in udp_ports.iterrows():
    udp_port_dict[row['port']] = row['description']

In [5]:
# A detailed view of the types and numbers of packets within each session.
# The sessions object will be parsed into individual nodes and properties in Neo4j
sessions = packets.sessions()

In [6]:
# Describe the fields in each of the packet layers
print("Eth fields: ",packets[30].fields_desc, "\n")
print("IP fields: ",packets[30].getlayer('IP').fields_desc,"\n")
print("TCP fields: ",packets[30].getlayer('TCP').fields_desc, "\n")
print("UDP fields: ",packets[0].getlayer('UDP').fields_desc, "\n")

Eth fields:  [<DestMACField (Ether).dst>, <SourceMACField (Ether).src>, <XShortEnumField (Ether).type>] 

IP fields:  [<BitField (IP,IPerror,IPv46).version>, <BitField (IP,IPerror,IPv46).ihl>, <XByteField (IP,IPerror,IPv46).tos>, <ShortField (IP,IPerror,IPv46).len>, <ShortField (IP,IPerror,IPv46).id>, <FlagsField (IP,IPerror,IPv46).flags>, <BitField (IP,IPerror,IPv46).frag>, <ByteField (IP,IPerror,IPv46).ttl>, <ByteEnumField (IP,IPerror,IPv46).proto>, <XShortField (IP,IPerror,IPv46).chksum>, <scapy.fields.Emph object at 0x7f3e6feb1e80>, <scapy.fields.Emph object at 0x7f3e6feb1f80>, <PacketListField (IP,IPerror,IPv46).options>] 

TCP fields:  [<ShortEnumField (TCP,TCPerror).sport>, <ShortEnumField (TCP,TCPerror).dport>, <IntField (TCP,TCPerror).seq>, <IntField (TCP,TCPerror).ack>, <BitField (TCP,TCPerror).dataofs>, <BitField (TCP,TCPerror).reserved>, <FlagsField (TCP,TCPerror).flags>, <ShortField (TCP,TCPerror).window>, <XShortField (TCP,TCPerror).chksum>, <ShortField (TCP,TCPerror).urg

In [7]:
# Create logical reference to local Neo4j server. 
# This will allow nodes and relationships to be created in Python and passed to Neo4j.
neo4jGraph = Graph("bolt://localhost:7687", auth=("neo4j", "#4Sa7YA:n304j"))

In [8]:
for key, value in sessions.items():
    # Split the session key to parse the key session characteristics
    key_split = key.split()
    # Local variable to store session size
    size = 0
    # Local variable to store a sample session packet
    pack = 0
    # Determine the session size in bytes
    for p in value:
        size = len(p)+size
        pack = p
    # Determine session time
    session_dur = float(value[-1].time - value[0].time)
    start_time = str(datetime.utcfromtimestamp(value[0].time))
    end_time = str(datetime.utcfromtimestamp(value[-1].time))
    start_time_neo_format = start_time.replace(" ", "T")
    start_time_neo_format = start_time_neo_format + "Z"
    end_time_neo_format = end_time.replace(" ", "T")
    end_time_neo_format = end_time_neo_format + "Z"
    # Check the session transport layer protocol
    if "TCP" in key_split[0]:
        firstNode = Node("Host", name=key_split[1].split(":")[0])
        secondNode = Node("Host", name=key_split[3].split(":")[0])
        sourcePort = int(key_split[1].split(":")[1])
        destPort = int(key_split[3].split(":")[1])
        # Check if the session is IPv4 or IPv6
        if(pack.getlayer('IP').version == 4):
            ipVersion = "IPv4"
        else:
            ipVersion = "IPv6"
        # Guess the service in use by the session
        if sourcePort in tcp_port_dict.keys():
            protocol = tcp_port_dict[sourcePort]
        elif destPort in tcp_port_dict.keys():
            protocol = tcp_port_dict[destPort]
        else:
            protocol = "Unkown"
        # Create a TCP relationship between the two nodes with the identified metadata
        SENDtcp = Relationship(firstNode, "TCP", secondNode, src_port=sourcePort, dst_port=destPort,
                               ip_version=ipVersion, session_size=size, service=protocol, duration=session_dur,
                              session_start=start_time_neo_format,session_end=end_time_neo_format)
        # Merge to the local Neo4j database
        neo4jGraph.merge(SENDtcp,"Host", "name")

    if "UDP" in key_split[0]:
        firstNode = Node("Host", name=key_split[1].split(":")[0])
        secondNode = Node("Host", name=key_split[3].split(":")[0])
        sourcePort = int(key_split[1].split(":")[1])
        destPort = int(key_split[3].split(":")[1])
        # Check if the session is IPv4 or IPv6
        if(pack.getlayer('IP').version == 4):
            ipVersion = "IPv4"
        else:
            ipVersion = "IPv6"
        # Guess the service in use by the session
        if sourcePort in udp_port_dict.keys():
            protocol = udp_port_dict[sourcePort]
        elif destPort in udp_port_dict.keys():
            protocol = udp_port_dict[destPort]
        else:
            protocol = "Unkown"
        # Create a UDP relationship between the two nodes with the identified metadata
        SENDudp = Relationship(firstNode, "UDP", secondNode, src_port=sourcePort, dst_port=destPort,
                               ip_version=ipVersion, session_size=size, service=protocol, duration=session_dur,
                              session_start=start_time_neo_format,session_end=end_time_neo_format)
        # Merge to the local Neo4j database
        neo4jGraph.merge(SENDudp,"Host", "name")
        
    elif "ARP" in key_split[0]:
        firstNode = Node("Host", name=key_split[1].split(":")[0])
        secondNode = Node("Host", name=key_split[3].split(":")[0])
        # Create an ARP relationship between the two nodes
        SENDarp = Relationship(firstNode, "ARP", secondNode, session_size=size, duration=session_dur,
                              session_start=start_time_neo_format,session_end=end_time_neo_format)
        # Merge to the local Neo4j database
        neo4jGraph.merge(SENDarp,"Host", "name")
    
    if "IP" in key_split[0]:
        if "udp" in key_split[-1].split('=')[1]:
            firstNode = Node("Host", name=key_split[1].split(":")[0])
            secondNode = Node("Host", name=key_split[3].split(":")[0])
            # Create a UDP relationship between the two nodes with the identified metadata
            SENDudp = Relationship(firstNode, "UDP", secondNode, session_size=size, duration=session_dur,
                              session_start=start_time_neo_format,session_end=end_time_neo_format)
            # Merge to the local Neo4j database
            neo4jGraph.merge(SENDudp,"Host", "name")
        elif "tcp" in key_split[-1].split('=')[1]:
            firstNode = Node("Host", name=key_split[1].split(":")[0])
            secondNode = Node("Host", name=key_split[3].split(":")[0])
            # Create a TCP relationship between the two nodes with the identified metadata
            SENDtcp = Relationship(firstNode, "TCP", secondNode, session_size=size, duration=session_dur,
                              session_start=start_time_neo_format,session_end=end_time_neo_format)
            # Merge to the local Neo4j database
            neo4jGraph.merge(SENDtcp,"Host", "name")

  start_time = str(datetime.utcfromtimestamp(value[0].time))
  end_time = str(datetime.utcfromtimestamp(value[-1].time))


In [9]:
# Cast session metadata to integers to enable quantitative queries in Neo4j
neo4jGraph.run("MATCH () - [r] - () SET r += {session_size: toInteger(r.session_size), dst_port: toInteger(r.dst_port), src_port: toInteger(r.src_port)} RETURN r")

r
"(192.168.86.42)-[:ARP {duration: 0.0, session_end: '2022-05-12T01:17:41Z', session_size: 60, session_start: '2022-05-12T01:17:41Z'}]->(192.168.86.77)"
"(192.168.86.77)-[:ARP {duration: 0.0, session_end: '2022-05-12T01:17:41Z', session_size: 42, session_start: '2022-05-12T01:17:41Z'}]->(192.168.86.42)"
"(192.168.86.77)-[:TCP {dst_port: 443, duration: 30.010245, ip_version: 'IPv4', service: 'HTTP protocol over TLS/SSL', session_end: '2022-05-12T01:18:31Z', session_size: 5564, session_start: '2022-05-12T01:18:01Z', src_port: 49903}]->(192.168.86.42)"
