In [10]:
import matplotlib
import re
import os
import sys
import time
from datetime import datetime
import numpy as np

##CurrentStep: Figure out file management for NTIs

'''
Process:
    1) Ingest pre-processed packet stream
    2) rewrite packet stream into network traffic images using predefined patterns
'''

'\nProcess:\n    1) Ingest pre-processed packet stream\n    2) rewrite packet stream into network traffic images using predefined patterns\n'

In [11]:
capture_src = 'captures_split'
capture_dst = 'captures_networkTrafficImages'
packet_match_regex = r'(?P<packetTime>\d+-\d+-\d+\W\d+:\d+:\d+\.\d+)\WIP.*proto\W(?P<proto>\w+).*length\W(?P<ipLength>\d+).*\r?\n\W*(?P<srcHost>[\w\.\-]+)\.(?P<srcPort>[\w]+)\W\>\W(?P<dstHost>[\w\.\-]+)\.(?P<dstPort>[\w]+).*length\W(?P<tcpLength>\d+).*'
time_format_string = '%Y-%m-%d %H:%M:%S.%f' #Predefined Time format string used in tcpdump captures

In [12]:
def get_connection_string(packet):
    return "%s:%s,%s:%s,%s"%(packet.src_host,packet.src_port,packet.dst_host,packet.dst_port,packet.proto)
def get_connection_string_reversed(packet):
    return "%s:%s,%s:%s,%s"%(packet.dst_host,packet.dst_port,packet.src_host,packet.src_port,packet.proto)
def time_string_to_datetime(time_string):
    return datetime.strptime(time_string,time_format_string)
def datetime_to_timestring(datetime_time):
    return datetime_time.strftime(time_format_string)

In [13]:
class packet:
    packet_string = ""
    src_host = ""
    src_port = ""
    dst_host = ""
    dst_port = ""
    proto = ""
    ip_length = 0
    tcp_length = 0
    packet_time = None
    def __init__(self,src_host,src_port,dst_host,dst_port,proto,packet_string,ip_length,tcp_length,packet_time_str):
        self.src_host = src_host
        self.src_port = src_port
        self.dst_host = dst_host
        self.dst_port = dst_port
        self.proto = proto
        self.packet_string = packet_string
        self.ip_length = int(ip_length)
        self.tcp_length = int(tcp_length)
        self.packet_time = datetime.strptime(packet_time_str,time_format_string)
    def to_string(self):
        print("srcHost:%s\nsrcPort:%s\ndstHost:%s\ndstPort:%s\nproto:%s\n" % (self.src_host,self.src_port,self.dst_host,self.dst_port,self.proto))

In [14]:
def get_packets_in_file(capture_file):
    line_one = ''
    line_two = ''
    packets = list()
    with open(capture_file,'r') as src:
        for line in src:
            line_one = line_two
            line_two = line
            line_pair = line_one + line_two
            
            packet_match = re.match(packet_match_regex,line_pair,re.MULTILINE)
            if(packet_match):
                cur_packet = packet(packet_match.group('srcHost'),packet_match.group('srcPort'),packet_match.group('dstHost'),packet_match.group('dstPort'),packet_match.group('proto'),line_pair,packet_match.group('ipLength'),packet_match.group('tcpLength'),packet_match.group('packetTime'))
                packets.append(cur_packet)
    return packets

In [19]:
def create_directory(directory):
    try:
        os.mkdir(directory)
    except FileExistsError:
        pass #We don't care if the directory already exists       
def create_empty_2d_array(array_x,array_y):
    return np.zeros((array_x,array_y),dtype=int) #np arrays are n_rows x n_columns
def write_2d_array_to_file(array_2d,output_file):
    array_2d[0][0] = 1
    array_2d[0][1] = 2
    with open(output_file,'w') as nti_file:
        for row in range (0,len(array_2d)):
            for column in range (0,len(array_2d[row])):
                nti_file.write(str(array_2d[row][column]))
                if(column != len(array_2d[row])-1): #Add spaces between elements exceot for last element
                    nti_file.write(" ")
            if(row != len(array_2d)-1): #Add new lines between rows except for last row
                nti_file.write('\n')
def arrange_packets_as_linear_map(packets,array_2d,packet_start_index,packet_end_index):
    pass #TODO: Arrange packets linearly in 2d array.
def arrange_packets_as_center_spiral(packets,array_2d,packet_start_index,packet_end_index):
    pass #TODO: Arrange packets linearly in 2d array.
def arrange_packets_as_edge_spiral(packets,array_2d,packet_start_index,packet_end_index):
    pass #TODO: Arrange packets linearly in 2d array.
def arrange_packets_as_waterfall(packets,array_2d,packet_start_index,packet_end_index):
    pass #TODO: Arrange packets linearly in 2d array.
def generate_linear_ntis(packet_list,nti_x,nti_y,output_dir,base_filename):
    nti_area = nti_x * nti_y
    cur_packet_list_index = 0
    
    if(len(packet_list)>nti_area):
        create_directory(output_dir)
    
    nti_array = create_empty_2d_array(nti_x,nti_y)
    
    while((len(packet_list)-cur_packet_list_index) > nti_area): #Case: There are enough packets to generate a new NTI
        #First fill up the 2d array
        arrange_packets_as_linear_map(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        
        #Then Write the 2D array to file (that is the NTI)
        output_nti_file = output_dir + os.sep + base_filename + '.nti-'+str(cur_packet_list_index) + '.nti'
        write_2d_array_to_file(nti_array,output_nti_file)
        
        #Zero out the nti_array for the next NTI to be generated
        nti_array.fill(0) #verified faster than declaring new np.zeros 2d-array
        
        #increment the current NTI index
        cur_packet_list_index += 1
        
        #debug
        if(cur_packet_list_index == 300):
            raise KeyboardInterrupt
    
def generate_linear_ntis2(packet_list,nti_x,nti_y,output_dir,base_filename):
    nti_area = nti_x * nti_y
    cur_packet_list_index = 0
    
    if(len(packet_list)>nti_area):
        try:
            os.mkdir(output_dir)
        except FileExistsError:
            pass #We don't care if the directory already exists       
    
    while((len(packet_list)-cur_packet_list_index) > nti_area): #Case: There are enough packets to generate a new NTI
        output_nti_file = output_dir + os.sep + base_filename + '.nti-'+str(cur_packet_list_index) + '.nti'
        with open(output_nti_file,'w') as nti_file:
            new_line_counter = 0
            y_line_counter = 0
            for nti_packet_idx in range (cur_packet_list_index, nti_area+cur_packet_list_index):
                nti_file.write(str(packet_list[nti_packet_idx].ip_length))
                new_line_counter += 1
                if(new_line_counter%nti_x==0 and y_line_counter < nti_x-1):
                    nti_file.write('\n')
                    new_line_counter=0
                    y_line_counter+=1
                elif (new_line_counter%nti_x!=(nti_x) and (new_line_counter!=nti_x or y_line_counter!=nti_x-1)):
                    nti_file.write(' ')
        cur_packet_list_index += 1
        
        
        #debug
        if(cur_packet_list_index == 300):
            raise KeyboardInterrupt
    
    


In [20]:
try:
    for x in os.walk(capture_src): #each os.walk element is: dirpath, subdir-names, dir-filenames
        outdir = x[0].replace(capture_src,capture_dst)
        try:
            os.mkdir(outdir)
        except FileExistsError:
            pass #We don't care if the directory already exists
        for capture in x[2]:
            if capture.endswith(".cap"):
                base_file = capture[:-4]
                date_file = base_file + '.date'
                src_file = x[0]+os.sep+capture
                output_dir = (x[0]+os.sep+base_file).replace(capture_src,capture_dst)
                print(src_file)
                print(output_dir)
                print(base_file)
                packets = get_packets_in_file(src_file)
                generate_linear_ntis(packets,16,16,output_dir,base_file)
                #raise KeyboardInterrupt
                        
except KeyboardInterrupt:
    print("Execution Stopped due to KeyboardInterrupt")
    pass
            

captures_split\games\hon\hon_raw_capture10\hon_raw_capture10.0.cap
captures_networkTrafficImages\games\hon\hon_raw_capture10\hon_raw_capture10.0
hon_raw_capture10.0
captures_split\games\hon\hon_raw_capture10\hon_raw_capture10.1.cap
captures_networkTrafficImages\games\hon\hon_raw_capture10\hon_raw_capture10.1
hon_raw_capture10.1
Execution Stopped due to KeyboardInterrupt


In [53]:
#Isolate the outputs into <src_name>.connectionNumber.cap
##Then plot the connections onto a single graph.
###The dominant connection in a packet capture is the one you keep