In [10]:
import matplotlib
import re
import os
import sys
import time
from datetime import datetime, timedelta
import numpy as np

##CurrentStep: Prograam global time-based NTI logic -- Maybe maybe make packet iterator with time parameters.

#Just a note-- Consider heuristics to determine whether a time based NTI is valid --
#        IE, only accept an NTI if a minimum of 30% of the time slots have actual packets in them.
#            Something about taking a bad image in your thesis write-up or something like that heh ;)

'''
Process:
    1) Ingest pre-processed packet stream
    2) rewrite packet stream into network traffic images using predefined patterns
'''

'\nProcess:\n    1) Ingest pre-processed packet stream\n    2) rewrite packet stream into network traffic images using predefined patterns\n'

In [2]:
capture_src = 'captures_split'
capture_dst = 'captures_networkTrafficImages'
packet_match_regex = r'(?P<packetTime>\d+-\d+-\d+\W\d+:\d+:\d+\.\d+)\WIP.*proto\W(?P<proto>\w+).*length\W(?P<ipLength>\d+).*\r?\n\W*(?P<srcHost>[\w\.\-]+)\.(?P<srcPort>[\w]+)\W\>\W(?P<dstHost>[\w\.\-]+)\.(?P<dstPort>[\w]+).*length\W(?P<tcpLength>\d+).*'
time_format_string = '%Y-%m-%d %H:%M:%S.%f' #Predefined Time format string used in tcpdump captures

In [3]:
def get_connection_string(packet):
    return "%s:%s,%s:%s,%s"%(packet.src_host,packet.src_port,packet.dst_host,packet.dst_port,packet.proto)
def get_connection_string_reversed(packet):
    return "%s:%s,%s:%s,%s"%(packet.dst_host,packet.dst_port,packet.src_host,packet.src_port,packet.proto)
def time_string_to_datetime(time_string):
    return datetime.strptime(time_string,time_format_string)
def datetime_to_timestring(datetime_time):
    return datetime_time.strftime(time_format_string)

In [4]:
class packet:
    packet_string = ""
    src_host = ""
    src_port = ""
    dst_host = ""
    dst_port = ""
    proto = ""
    ip_length = 0
    tcp_length = 0
    packet_time = None
    def __init__(self,src_host,src_port,dst_host,dst_port,proto,packet_string,ip_length,tcp_length,packet_time_str):
        self.src_host = src_host
        self.src_port = src_port
        self.dst_host = dst_host
        self.dst_port = dst_port
        self.proto = proto
        self.packet_string = packet_string
        self.ip_length = int(ip_length)
        self.tcp_length = int(tcp_length)
        self.packet_time = datetime.strptime(packet_time_str,time_format_string)
    def to_string(self):
        print("srcHost:%s\nsrcPort:%s\ndstHost:%s\ndstPort:%s\nproto:%s\n" % (self.src_host,self.src_port,self.dst_host,self.dst_port,self.proto))

In [5]:
def get_packets_in_file(capture_file):
    line_one = ''
    line_two = ''
    packets = list()
    with open(capture_file,'r') as src:
        for line in src:
            line_one = line_two
            line_two = line
            line_pair = line_one + line_two
            
            packet_match = re.match(packet_match_regex,line_pair,re.MULTILINE)
            if(packet_match):
                cur_packet = packet(packet_match.group('srcHost'),packet_match.group('srcPort'),packet_match.group('dstHost'),packet_match.group('dstPort'),packet_match.group('proto'),line_pair,packet_match.group('ipLength'),packet_match.group('tcpLength'),packet_match.group('packetTime'))
                packets.append(cur_packet)
    return packets

In [6]:
def create_directory(directory):
    try:
        os.mkdir(directory)
    except FileExistsError:
        pass #We don't care if the directory already exists       
def create_empty_2d_array(array_x,array_y):
    return np.zeros((array_x,array_y),dtype=int) #np arrays are n_rows x n_columns
def write_2d_array_to_file(array_2d,output_file):
    with open(output_file,'w') as nti_file:
        for row in range (0,len(array_2d)):
            for column in range (0,len(array_2d[row])):
                nti_file.write(str(array_2d[row][column]))
                if(column != len(array_2d[row])-1): #Add spaces between elements exceot for last element
                    nti_file.write(" ")
            if(row != len(array_2d)-1): #Add new lines between rows except for last row
                nti_file.write('\n')
def arrange_packets_as_linear_map(packets,array_2d,packet_start_index,packet_end_index):
    x_dim = len(array_2d)
    y_dim = len(array_2d[0])
    #for packet_idx in range (packet_start_index,packet_end_index):
    for row in range (0,x_dim):
        for col in range (0,y_dim):
            array_2d[row][col] = packets[packet_start_index+col+(row*x_dim)].ip_length #ip_packet_length is our targeted feature
    #debug
    print(array_2d)
    raise KeyboardInterrupt
def arrange_packets_as_center_spiral(packets,array_2d,packet_start_index,packet_end_index):
    #A center spiral arrangement can be done exactly the same as an edge spiral,
    #  the only difference is that you place the packets backwards
    x_dim = len(array_2d)
    y_dim = len(array_2d[0])
    if(x_dim % 2 == 0 or y_dim % 2 == 0 or x_dim != y_dim): #Dimension sanity check
        print("ERROR: Spiral 2D Array Dimensions must be ODD and have equal x and y dimensions!")
        return
    row_current = 0
    col_current = 0
    packet_index_offset=0
    center_to_edge_distance=x_dim//2
    for spiral_level in range (0,center_to_edge_distance+1):
        #Start inserting elements at the current spiral loop level
        row_current=spiral_level
        col_current=spiral_level
        for orientation in range (0,4): #4 directions (#0==col_right,1==row_down,2==col_left,3==row_up)
            for nElements in range (0,y_dim-(spiral_level*2)):
                if(orientation!=0 and nElements==0): #Direction has changed, skip iteration for current packet!
                    continue
                if(orientation==3 and nElements==1): #Last side has 2 overlaps (side 1 & side 3)
                    continue
                array_2d[row_current][col_current]=packets[packet_end_index-packet_index_offset-1].ip_length
                packet_index_offset += 1
                if(orientation==0):
                    col_current+=1
                elif(orientation==1):
                    row_current+=1
                elif(orientation==2):
                    col_current-=1
                elif(orientation==3):
                    row_current-=1
            #Changing Directions
            if(orientation==0):
                col_current-=1
                row_current+=1
            elif(orientation==1):
                row_current-=1
                col_current-=1
            elif(orientation==2):
                row_current-=1
                col_current+=1
            elif(orientation==3):
                pass #Orientation switching is complete for this cycle!
    #debug
    print(array_2d)
    raise KeyboardInterrupt
def arrange_packets_as_edge_spiral(packets,array_2d,packet_start_index,packet_end_index):
    x_dim = len(array_2d)
    y_dim = len(array_2d[0])
    if(x_dim % 2 == 0 or y_dim % 2 == 0 or x_dim != y_dim): #Dimension sanity check
        print("ERROR: Spiral 2D Array Dimensions must be ODD and have equal x and y dimensions!")
        return
    row_current = 0
    col_current = 0
    packet_index_offset=0
    center_to_edge_distance=x_dim//2
    for spiral_level in range (0,center_to_edge_distance+1):
        #Start inserting elements at the current spiral loop level
        row_current=spiral_level
        col_current=spiral_level
        for orientation in range (0,4): #4 directions (#0==col_right,1==row_down,2==col_left,3==row_up)
            for nElements in range (0,y_dim-(spiral_level*2)):
                if(orientation!=0 and nElements==0): #Direction has changed, skip iteration for current packet!
                    continue
                if(orientation==3 and nElements==1): #Last side has 2 overlaps (side 1 & side 3)
                    continue
                array_2d[row_current][col_current]=packets[packet_start_index+packet_index_offset].ip_length
                packet_index_offset += 1
                if(orientation==0):
                    col_current+=1
                elif(orientation==1):
                    row_current+=1
                elif(orientation==2):
                    col_current-=1
                elif(orientation==3):
                    row_current-=1
            #Changing Directions
            if(orientation==0):
                col_current-=1
                row_current+=1
            elif(orientation==1):
                row_current-=1
                col_current-=1
            elif(orientation==2):
                row_current-=1
                col_current+=1
            elif(orientation==3):
                pass #Orientation switching is complete for this cycle!
    #debug
    print(array_2d)
    raise KeyboardInterrupt
        
def arrange_packets_as_waterfall(packets,array_2d,packet_start_index,packet_end_index):
    x_dim = len(array_2d)
    y_dim = len(array_2d[0])
    packet_index_offset=0
    #for packet_idx in range (packet_start_index,packet_end_index):
    for row in range (0,x_dim):
        array_2d[row][row] = packets[packet_start_index+packet_index_offset].ip_length #ip_packet_length is our targeted feature
        packet_index_offset += 1
        for val in range (1,row+1):
            array_2d[row-val][row] = packets[packet_start_index+packet_index_offset].ip_length #ip_packet_length is our targeted feature
            packet_index_offset += 1
            array_2d[row][row-val] = packets[packet_start_index+packet_index_offset].ip_length #ip_packet_length is our targeted feature
            packet_index_offset += 1
    #debug
    print(array_2d)
    raise KeyboardInterrupt
def arrange_packets_as_diagonal(packets,array_2d,packet_start_index,packet_end_index):
    x_dim = len(array_2d)
    y_dim = len(array_2d[0])
    packet_index_offset=0
    #for packet_idx in range (packet_start_index,packet_end_index):
    for row in range (0,x_dim):
        for val in range (0,row+1):
            array_2d[row-val][val] = packets[packet_start_index+packet_index_offset].ip_length #ip_packet_length is our targeted feature
            packet_index_offset += 1
    for row in range (x_dim-1,-1,-1):       
        for val in range (row-1,-1,-1):
            array_2d[x_dim-row+val][x_dim-val-1] = packets[packet_start_index+packet_index_offset].ip_length #ip_packet_length is our targeted feature
            packet_index_offset += 1
    #debug
    print(array_2d)
    raise KeyboardInterrupt
def generate_ntis(packet_list,nti_x,nti_y,output_dir,base_filename,nti_packet_arrangement):
    #nti_packet_arrangement can be 'linear','edge_spiral','center_spiral','waterfall','diagonal'
    nti_area = nti_x * nti_y
    cur_packet_list_index = 0
    
    if(len(packet_list)>nti_area):
        create_directory(output_dir)
    
    nti_array = create_empty_2d_array(nti_x,nti_y)
    
    while((len(packet_list)-cur_packet_list_index) > nti_area): #Case: There are enough packets to generate a new NTI
        #First fill up the 2d array
        if(nti_packet_arrangement=='linear'):
            arrange_packets_as_linear_map(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='edge_spiral'):
            arrange_packets_as_edge_spiral(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='center_spiral'):
            arrange_packets_as_center_spiral(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='waterfall'):
            arrange_packets_as_waterfall(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='diagonal'):
            arrange_packets_as_diagonal(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        else:
            print("ERROR: Unsupported packet arrangement detected!")
            raise KeyboardInterrupt
        
        #Then Write the 2D array to file (that is the NTI)
        output_nti_file = output_dir + os.sep + base_filename + '.nti-'+str(cur_packet_list_index) + '.nti'
        write_2d_array_to_file(nti_array,output_nti_file)
        
        #Zero out the nti_array for the next NTI to be generated
        nti_array.fill(0) #verified faster than declaring new np.zeros 2d-array
        
        #increment the current NTI index
        cur_packet_list_index += 1
        
        #debug
        if(cur_packet_list_index == 300):
            raise KeyboardInterrupt


In [17]:
def get_packet_indices_for_next_time_interval(packets,current_packet_index,previous_time,time_interval_duration):
    #Function to get the packets for the next NTI time interval
    ##It is assumed that packets are ordered in linearly descending time
    #previous_time is a datetime.datetime object
    #Note that time_interval_duration is a datetime.timedelta object.
    time_interval_end = previous_time + time_interval_duration
    packet_start_index = current_packet_index
    packet_end_index = current_packet_index
    
    if(packet_end_index+1 < len(packets)):
        return -1,-1,time_interval_end #Packets end before time interval is satisfied -- INVALID NTI!
    
    while(packets[packet_end_index+1].packet_time <= time_interval_end):
        if(packet_end_index+1 < len(packets)):
            return -1,-1,time_interval_end #Packets end before time interval is satisfied -- INVALID NTI!
        packet_end_index+=1 #Another packet is found to be in the NTI
    return packet_start_index,packet_end_index,time_interval_end
def get_average_packet_ip_header_length_for_packets(packets,packet_start_index,packet_end_index):
    num_packets = packet_end_index - packet_start_index
    packets_ip_length_total = 0
    for idx in range (packet_start_index,packet_end_index):
        packet_size_total += packets[idx].ip_length
    average_packet_length = 0
    if(num_packets > 0): #Avoid division by zero exception
        average_packet_length = round(packets_ip_length_total / num_packets)
    return average_packet_length
def arrange_packets_as_linear_map_time_based(packets,array_2d,packet_start_index,packet_end_index,nti_time_range_ms=10000):
    x_dim = len(array_2d)
    y_dim = len(array_2d[0])
    nti_area = x_dim * y_dim
    nti_time_interval = nti_time_range_ms / nti_area #Number of milliseconds per NTI pixel == time range / number of pixels (packets)
    nti_time_interval = timedelta(milliseconds=nti_time_interval)
    
    packets_start_idx = 0
    packets_end_idx = 0
    current_nti_time_interval_start = packets[0].packet_time
    #for packet_idx in range (packet_start_index,packet_end_index):
    for row in range (0,x_dim):
        for col in range (0,y_dim):
            packets_start_idx,packets_end_idx,current_nti_time_interval_start = get_packet_indices_for_next_time_interval(packets,packets_start_idx,current_nti_time_interval_start,nti_time_interval)
            if(packets_start_idx==-1):#We have reached the end of the packet stream given the time interval!
                return -1 #sentinal value for reached end of stream
            avg_ip_length_for_packets = get_average_packet_ip_header_length_for_packets(packets,packets_start_idx,packets_end_idx)
            array_2d[row][col] = avg_ip_length_for_packets
    #I've got a packet list -- each packet in the list has a datetime timestamp.
    # What are the variables I need to consider?
    # How will I keep track of this time dimension
    # How will I keep track of packets nearest to a given or current time.... hmmm.....
    # How will I return packets for a given time range -- then keep track of index for last packet returned... 
    #
    #debug
    print(array_2d)
    raise KeyboardInterrupt
    return 0 #success
def generate_time_relative_ntis(packet_list,nti_x,nti_y,output_dir,base_filename,nti_packet_arrangement,nti_time_range_ms=10000):
    #nti_packet_arrangement can be 'linear','edge_spiral','center_spiral','waterfall','diagonal'
    #time_range is the maximum size of the NTI_Area
    nti_area = nti_x * nti_y
    cur_packet_list_index = 0
    
    if(len(packet_list)>nti_area):
        create_directory(output_dir)
    
    nti_array = create_empty_2d_array(nti_x,nti_y)
    
    while((len(packet_list)-cur_packet_list_index) > nti_area): #Case: There are enough packets to generate a new NTI
        #First fill up the 2d array
        if(nti_packet_arrangement=='linear'):
            result = arrange_packets_as_linear_map_time_based(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area,nti_time_range_ms)
        elif(nti_packet_arrangement=='edge_spiral'):
            result = arrange_packets_as_edge_spiral(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='center_spiral'):
            result = arrange_packets_as_center_spiral(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='waterfall'):
            result = arrange_packets_as_waterfall(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='diagonal'):
            result = arrange_packets_as_diagonal(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        else:
            print("ERROR: Unsupported packet arrangement detected!")
            raise KeyboardInterrupt
        
        if(result==-1):
            return #Reached end of packet stream, No more time-relative NTIs to be written
        
        #Then Write the 2D array to file (that is the NTI)
        output_nti_file = output_dir + os.sep + base_filename + '.nti-'+str(cur_packet_list_index) + '.nti'
        write_2d_array_to_file(nti_array,output_nti_file)
        
        #Zero out the nti_array for the next NTI to be generated
        nti_array.fill(0) #verified faster than declaring new np.zeros 2d-array
        
        #increment the current NTI index
        cur_packet_list_index += 1
        
        #debug
        if(cur_packet_list_index == 300):
            raise KeyboardInterrupt


In [18]:
try:
    for x in os.walk(capture_src): #each os.walk element is: dirpath, subdir-names, dir-filenames
        outdir = x[0].replace(capture_src,capture_dst)
        try:
            os.mkdir(outdir)
        except FileExistsError:
            pass #We don't care if the directory already exists
        for capture in x[2]:
            if capture.endswith(".cap"):
                base_file = capture[:-4]
                date_file = base_file + '.date'
                src_file = x[0]+os.sep+capture
                output_dir = (x[0]+os.sep+base_file).replace(capture_src,capture_dst)
                print(src_file)
                print(output_dir)
                print(base_file)
                packets = get_packets_in_file(src_file)
                
                #debug  to ensure that arrangement code is working as intended
                #for packet_idx in range (0,225):
                #    packets[packet_idx].ip_length = packet_idx
                #end debug for arrangement code testing
                
                #generate_ntis(packets,5,5,output_dir,base_file,nti_packet_arrangement='linear')
                #generate_ntis(packets,5,5,output_dir,base_file,nti_packet_arrangement='edge_spiral')
                #generate_ntis(packets,5,5,output_dir,base_file,nti_packet_arrangement='center_spiral')
                #generate_ntis(packets,5,5,output_dir,base_file,nti_packet_arrangement='waterfall')
                #generate_ntis(packets,5,5,output_dir,base_file,nti_packet_arrangement='diagonal')
                generate_time_relative_ntis(packets,5,5,output_dir,base_file,nti_packet_arrangement='linear')
                #raise KeyboardInterrupt
                        
except KeyboardInterrupt:
    print("Execution Stopped due to KeyboardInterrupt")
    pass
            

captures_split\games\hon\hon_raw_capture10\hon_raw_capture10.0.cap
captures_networkTrafficImages\games\hon\hon_raw_capture10\hon_raw_capture10.0
hon_raw_capture10.0
captures_split\games\hon\hon_raw_capture10\hon_raw_capture10.1.cap
captures_networkTrafficImages\games\hon\hon_raw_capture10\hon_raw_capture10.1
hon_raw_capture10.1
captures_split\games\hon\hon_raw_capture10\hon_raw_capture10.10.cap
captures_networkTrafficImages\games\hon\hon_raw_capture10\hon_raw_capture10.10
hon_raw_capture10.10
captures_split\games\hon\hon_raw_capture10\hon_raw_capture10.100.cap
captures_networkTrafficImages\games\hon\hon_raw_capture10\hon_raw_capture10.100
hon_raw_capture10.100
captures_split\games\hon\hon_raw_capture10\hon_raw_capture10.1000.cap
captures_networkTrafficImages\games\hon\hon_raw_capture10\hon_raw_capture10.1000
hon_raw_capture10.1000
captures_split\games\hon\hon_raw_capture10\hon_raw_capture10.1001.cap
captures_networkTrafficImages\games\hon\hon_raw_capture10\hon_raw_capture10.1001
hon_ra

In [182]:
#Isolate the outputs into <src_name>.connectionNumber.cap
##Then plot the connections onto a single graph.
###The dominant connection in a packet capture is the one you keep