In [32]:
import matplotlib
import re
import os
import sys
import time
from datetime import datetime

##Current Step: Start plotting connection indexes by IP Header Size
##Plot connections that use most bandwidth -- using IP Header averages. (Top 10 for game connections maybe?0)
##Plot connections that exist for the longest duration?! -- That makes the most sense since things like HoN have long durations
#### with tiny packet sizes in general.

In [33]:
capture_src = 'captures'
capture_dst = 'captures_split'
processed = 'processed_log.txt'
packet_match_regex = r'(?P<packetTime>\d+-\d+-\d+\W\d+:\d+:\d+\.\d+)\WIP.*proto\W(?P<proto>\w+).*length\W(?P<ipLength>\d+).*\r?\n\W*(?P<srcHost>[\w\.\-]+)\.(?P<srcPort>[\w]+)\W\>\W(?P<dstHost>[\w\.\-]+)\.(?P<dstPort>[\w]+).*length\W(?P<tcpLength>\d+).*'
time_format_string = '%Y-%m-%d %H:%M:%S.%f' #Predefined Time format string used in tcpdump captures

In [34]:
def get_connection_string(packet):
    return "%s:%s,%s:%s,%s"%(packet.src_host,packet.src_port,packet.dst_host,packet.dst_port,packet.proto)
def get_connection_string_reversed(packet):
    return "%s:%s,%s:%s,%s"%(packet.dst_host,packet.dst_port,packet.src_host,packet.src_port,packet.proto)
def time_string_to_datetime(time_string):
    return datetime.strptime(time_string,time_format_string)
def datetime_to_timestring(datetime_time):
    return datetime_time.strftime(time_format_string)
class packet_flow:
    connection_string = ""
    first_packet_datetime = None
    last_packet_datetime = None
    total_bytes = 0
    def __init__(self,packet):
        self.connection_string = packet.packet_string
        self.first_packet_datetime = packet.packet_time
        self.last_packet_datetime = packet.packet_time
        self.total_bytes = packet.ip_length
    def add_packet(self,packet):
        self.total_bytes = self.total_bytes + packet.ip_length
        if(packet.packet_time < self.first_packet_datetime):
            self.first_packet_datetime = packet.packet_time
        elif (packet.packet_time > self.last_packet_datetime):
            self.last_packet_datetime = packet.packet_time
    def get_flow_duration(self): #returns duration as seconds
        return (self.last_packet_datetime - self.first_packet_datetime).total_seconds()
class connection_map:
    connection_list = list()
    packet_flow_list = list()
    def __init__(self):
        self.connection_list = list()
        self.packet_flow_list = list()
    def get_connection_index(self,packet):
        connection_string = "%s:%s,%s:%s,%s"%(packet.src_host,packet.src_port,packet.dst_host,packet.dst_port,packet.proto)
        connection_string_reversed = "%s:%s,%s:%s,%s"%(packet.dst_host,packet.dst_port,packet.src_host,packet.src_port,packet.proto)
        try:
            connection_index = self.connection_list.index(connection_string)
            self.packet_flow_list[connection_index].add_packet(packet)
            return connection_index
        except ValueError: #packet stream forward doesn't exist, check backwards (uplink vs downlink)
            try: #We don't want to separate packet flows by uplink/downlink streams, combine them!
                connection_index = self.connection_list.index(connection_string_reversed)
                self.packet_flow_list[connection_index].add_packet(packet)
                return connection_index
            except ValueError: #Packet stream is completely new 
                self.connection_list.append(connection_string)
                self.packet_flow_list.append(packet_flow(packet))
                return self.connection_list.index(connection_string)
    def write_connection_map_to_file(self,file):
        with open(file,'w') as connection_map_file:
            for idx,connection_string in enumerate(self.connection_list):
                #Connection Map File format is:
                #connection_index,src_host:src_port,dst_host:dst_port,proto,packet_flow_total_bytes_transferred,packet_flow_duration
                connection_map_file.write(str(idx)+","+connection_string+","+str(self.packet_flow_list[idx].total_bytes)+","+str(self.packet_flow_list[idx].get_flow_duration()) +'\n')

In [35]:
class packet:
    packet_string = ""
    src_host = ""
    src_port = ""
    dst_host = ""
    dst_port = ""
    proto = ""
    ip_length = 0
    tcp_length = 0
    packet_time = None
    def __init__(self,src_host,src_port,dst_host,dst_port,proto,packet_string,ip_length,tcp_length,packet_time_str):
        self.src_host = src_host
        self.src_port = src_port
        self.dst_host = dst_host
        self.dst_port = dst_port
        self.proto = proto
        self.packet_string = packet_string
        self.ip_length = int(ip_length)
        self.tcp_length = int(tcp_length)
        self.packet_time = datetime.strptime(packet_time_str,time_format_string)
    def to_string(self):
        print("srcHost:%s\nsrcPort:%s\ndstHost:%s\ndstPort:%s\nproto:%s\n" % (self.src_host,self.src_port,self.dst_host,self.dst_port,self.proto))

In [36]:
def split_packet_flows(capture_file,split_dir,base_filename):
    line_one = ''
    line_two = ''
    packet_connection_map = connection_map()
    
    #Ensure that the split flows directory exists
    try:
        os.mkdir(split_dir)
    except FileExistsError:
        pass #We don't care if the directory already exists    
    
    #Loop through the capture file searching for normal packets, ignore everything else.
    with open(capture_file,'r') as src:
        for line in src:
            line_one = line_two
            line_two = line
            line_pair = line_one + line_two
            
            packet_match = re.match(packet_match_regex,line_pair,re.MULTILINE)
            if(packet_match):
                cur_packet = packet(packet_match.group('srcHost'),packet_match.group('srcPort'),packet_match.group('dstHost'),packet_match.group('dstPort'),packet_match.group('proto'),line_pair,packet_match.group('ipLength'),packet_match.group('tcpLength'),packet_match.group('packetTime'))
                connection_idx = packet_connection_map.get_connection_index(cur_packet)
                
                split_file_path = split_dir + os.sep + base_filename + '.' + str(connection_idx) + '.cap'
                #Write packet to file with connection index
                with open(split_file_path,'a') as split_dst:
                    split_dst.write(cur_packet.packet_string)
    packet_connection_map.write_connection_map_to_file(split_dir+os.sep+'connections.map')
def get_packets_in_file(capture_file):
    line_one = ''
    line_two = ''
    packets = list()
    with open(capture_file,'r') as src:
        for line in src:
            line_one = line_two
            line_two = line
            line_pair = line_one + line_two
            
            packet_match = re.match(packet_match_regex,line_pair,re.MULTILINE)
            if(packet_match):
                cur_packet = packet(packet_match.group('srcHost'),packet_match.group('srcPort'),packet_match.group('dstHost'),packet_match.group('dstPort'),packet_match.group('proto'),line_pair,packet_match.group('ipLength'),packet_match.group('tcpLength'))
                packets.append(cur_packet)
    return packets

In [37]:
try:
    for x in os.walk(capture_src): #each os.walk element is: dirpath, subdir-names, dir-filenames
        outdir = x[0].replace(capture_src,capture_dst)
        try:
            os.mkdir(outdir)
        except FileExistsError:
            pass #We don't care if the directory already exists
        for capture in x[2]:
            if capture.endswith(".cap"):
                base_file = capture[:-4]
                date_file = base_file + '.date'
                src_file = x[0]+os.sep+capture
                split_dir = (x[0]+os.sep+base_file).replace('captures','captures_split')
                print(src_file)
                print(split_dir)
                print(base_file)
                split_packet_flows(src_file,split_dir,base_file)
                raise KeyboardInterrupt
                '''
                capture_packets = get_packets_in_file(x[0]+"\\"+capture)
                
                for packet in capture_packets:
                    print(packet)
                    raise KeyboardInterrupt
                '''
                        
except KeyboardInterrupt:
    pass
            
    #print(outdir)

captures\games\hon\hon_raw_capture10.cap
captures_split\games\hon\hon_raw_capture10
hon_raw_capture10


In [53]:
#Isolate the outputs into <src_name>.connectionNumber.cap
##Then plot the connections onto a single graph.
###The dominant connection in a packet capture is the one you keep