In [10]:
import matplotlib
import re
import os
import sys
import time
from datetime import datetime
import numpy as np

##CurrentStep: Figure out file management for NTIs

'''
Process:
    1) Ingest pre-processed packet stream
    2) rewrite packet stream into network traffic images using predefined patterns
'''

'\nProcess:\n    1) Ingest pre-processed packet stream\n    2) rewrite packet stream into network traffic images using predefined patterns\n'

In [11]:
capture_src = 'captures_split'
capture_dst = 'captures_networkTrafficImages'
packet_match_regex = r'(?P<packetTime>\d+-\d+-\d+\W\d+:\d+:\d+\.\d+)\WIP.*proto\W(?P<proto>\w+).*length\W(?P<ipLength>\d+).*\r?\n\W*(?P<srcHost>[\w\.\-]+)\.(?P<srcPort>[\w]+)\W\>\W(?P<dstHost>[\w\.\-]+)\.(?P<dstPort>[\w]+).*length\W(?P<tcpLength>\d+).*'
time_format_string = '%Y-%m-%d %H:%M:%S.%f' #Predefined Time format string used in tcpdump captures

In [12]:
def get_connection_string(packet):
    return "%s:%s,%s:%s,%s"%(packet.src_host,packet.src_port,packet.dst_host,packet.dst_port,packet.proto)
def get_connection_string_reversed(packet):
    return "%s:%s,%s:%s,%s"%(packet.dst_host,packet.dst_port,packet.src_host,packet.src_port,packet.proto)
def time_string_to_datetime(time_string):
    return datetime.strptime(time_string,time_format_string)
def datetime_to_timestring(datetime_time):
    return datetime_time.strftime(time_format_string)

In [13]:
class packet:
    packet_string = ""
    src_host = ""
    src_port = ""
    dst_host = ""
    dst_port = ""
    proto = ""
    ip_length = 0
    tcp_length = 0
    packet_time = None
    def __init__(self,src_host,src_port,dst_host,dst_port,proto,packet_string,ip_length,tcp_length,packet_time_str):
        self.src_host = src_host
        self.src_port = src_port
        self.dst_host = dst_host
        self.dst_port = dst_port
        self.proto = proto
        self.packet_string = packet_string
        self.ip_length = int(ip_length)
        self.tcp_length = int(tcp_length)
        self.packet_time = datetime.strptime(packet_time_str,time_format_string)
    def to_string(self):
        print("srcHost:%s\nsrcPort:%s\ndstHost:%s\ndstPort:%s\nproto:%s\n" % (self.src_host,self.src_port,self.dst_host,self.dst_port,self.proto))

In [14]:
def get_packets_in_file(capture_file):
    line_one = ''
    line_two = ''
    packets = list()
    with open(capture_file,'r') as src:
        for line in src:
            line_one = line_two
            line_two = line
            line_pair = line_one + line_two
            
            packet_match = re.match(packet_match_regex,line_pair,re.MULTILINE)
            if(packet_match):
                cur_packet = packet(packet_match.group('srcHost'),packet_match.group('srcPort'),packet_match.group('dstHost'),packet_match.group('dstPort'),packet_match.group('proto'),line_pair,packet_match.group('ipLength'),packet_match.group('tcpLength'),packet_match.group('packetTime'))
                packets.append(cur_packet)
    return packets

In [44]:
def create_directory(directory):
    try:
        os.mkdir(directory)
    except FileExistsError:
        pass #We don't care if the directory already exists       
def create_empty_2d_array(array_x,array_y):
    return np.zeros((array_x,array_y),dtype=int) #np arrays are n_rows x n_columns
def write_2d_array_to_file(array_2d,output_file):
    with open(output_file,'w') as nti_file:
        for row in range (0,len(array_2d)):
            for column in range (0,len(array_2d[row])):
                nti_file.write(str(array_2d[row][column]))
                if(column != len(array_2d[row])-1): #Add spaces between elements exceot for last element
                    nti_file.write(" ")
            if(row != len(array_2d)-1): #Add new lines between rows except for last row
                nti_file.write('\n')
def arrange_packets_as_linear_map(packets,array_2d,packet_start_index,packet_end_index):
    x_dim = len(array_2d)
    y_dim = len(array_2d[0])
    #for packet_idx in range (packet_start_index,packet_end_index):
    for row in range (0,x_dim):
        for col in range (0,y_dim):
            array_2d[row][col] = packets[packet_start_index+col+(row*x_dim)].ip_length #ip_packet_length is our targeted feature
def arrange_packets_as_center_spiral(packets,array_2d,packet_start_index,packet_end_index):
    
    #Get Center Array Position
    ##Each distance/layer from the center is number of positions to fill
    #X X X X X
    #X 7 8 9 X
    #9 6 1 2 1
    #8 5 4 3 2
    #7 6 5 4 3

    
    #X X K X X
    #X 7 K 9 X
    #K K C K K
    #8 5 K 3 2
    #7 6 K 4 3
    
    #X X X X
    #X C X X
    #X X X X
    #X X X X
    x_dim = len(array_2d)
    y_dim = len(array_2d[0])
    #Center Spiral Must be An Odd Size!
    if(x_dim % 2 == 0 or y_dim % 2 == 0):
        print("ERROR: Spiral 2D Array Dimensions must be ODD! Not Even!")
        return
    center_x = x_dim // 2
    center_y = y_dim // 2
    distance_from_center = 0
    keypoint_1_x = center_x + distance_from_center
    keypoint_1_y = center_y
    keypoint_2_x = center_x
    keypoint_2_y = center_y + distance_from_center
    keypoint_3_x = center_x - distance_from_center
    keypoint_3_y = center_y
    keypoint_4_x = center_x
    keypoint_4_y = center_y - distance_from_center
    
    for packet_idx in range(packet_start_index,packet_end_index):
        pass
    pass #TODO: Arrange packets as a center spiral in 2d array.
def arrange_packets_as_edge_spiral(packets,array_2d,packet_start_index,packet_end_index):
    x_dim = len(array_2d)
    y_dim = len(array_2d[0])
    if(x_dim % 2 == 0 or y_dim % 2 == 0 or x_dim != y_dim): #Dimension sanity check
        print("ERROR: Spiral 2D Array Dimensions must be ODD and have equal x and y dimensions!")
        return
    row_current = 0
    col_current = 0
    direction = 0 #0==col_right,1==row_down,2==col_left,3==row_up
    packet_index_offset=0
    center_to_edge_distance=x_dim//2
    for spirals in range (0,center_to_edge_distance):
        for orientation in range (0,4): #4 directions (IE 4-sides)
            for nElements in range (0,y_dim-spirals-1):
                array_2d[row_current][col_current]=packets[packet_start_index+packet_index_offset].ip_length
                print(packets[packet_start_index+packet_index_offset].ip_length)
                packet_index_offset += 1
                if(direction==0):
                    col_current+=1
                elif(direction==1):
                    row_current+=1
                elif(direction==2):
                    col_current-=1
                elif(direction==3):
                    row_current-=1
            #change directions
            direction = (direction+1) % 4
        #Spiral Level Complete, Increment level of spiral by 1
        row_current+=1
        col_current+=1
        
def arrange_packets_as_waterfall(packets,array_2d,packet_start_index,packet_end_index):
    pass #TODO: Arrange packets as waterfall in 2d array.
def generate_ntis(packet_list,nti_x,nti_y,output_dir,base_filename,nti_packet_arrangement):
    #nti_packet_arrangement can be 'linear','edge_spiral','center_spiral','waterfall'
    nti_area = nti_x * nti_y
    cur_packet_list_index = 0
    
    if(len(packet_list)>nti_area):
        create_directory(output_dir)
    
    nti_array = create_empty_2d_array(nti_x,nti_y)
    
    while((len(packet_list)-cur_packet_list_index) > nti_area): #Case: There are enough packets to generate a new NTI
        #First fill up the 2d array
        if(nti_packet_arrangement=='linear'):
            arrange_packets_as_linear_map(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='edge_spiral'):
            arrange_packets_as_edge_spiral(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='center_spiral'):
            arrange_packets_as_center_spiral(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)
        elif(nti_packet_arrangement=='waterfall'):
            arrange_packets_as_waterfall(packets,nti_array,cur_packet_list_index,cur_packet_list_index+nti_area)

        
        #Then Write the 2D array to file (that is the NTI)
        output_nti_file = output_dir + os.sep + base_filename + '.nti-'+str(cur_packet_list_index) + '.nti'
        write_2d_array_to_file(nti_array,output_nti_file)
        
        #Zero out the nti_array for the next NTI to be generated
        nti_array.fill(0) #verified faster than declaring new np.zeros 2d-array
        
        #increment the current NTI index
        cur_packet_list_index += 1
        
        #debug
        if(cur_packet_list_index == 300):
            raise KeyboardInterrupt


In [45]:
try:
    for x in os.walk(capture_src): #each os.walk element is: dirpath, subdir-names, dir-filenames
        outdir = x[0].replace(capture_src,capture_dst)
        try:
            os.mkdir(outdir)
        except FileExistsError:
            pass #We don't care if the directory already exists
        for capture in x[2]:
            if capture.endswith(".cap"):
                base_file = capture[:-4]
                date_file = base_file + '.date'
                src_file = x[0]+os.sep+capture
                output_dir = (x[0]+os.sep+base_file).replace(capture_src,capture_dst)
                print(src_file)
                print(output_dir)
                print(base_file)
                packets = get_packets_in_file(src_file)
                
                #debug  to ensure that arrangement code is working as intended
                for packet_idx in range (0,225):
                    packets[packet_idx].ip_length = packet_idx
                #end debug for arrangement code testing
                
                #generate_ntis(packets,16,16,output_dir,base_file,nti_packet_arrangement='linear')
                generate_ntis(packets,5,5,output_dir,base_file,nti_packet_arrangement='edge_spiral')
                #raise KeyboardInterrupt
                        
except KeyboardInterrupt:
    print("Execution Stopped due to KeyboardInterrupt")
    pass
            

captures_split\games\hon\hon_raw_capture10\hon_raw_capture10.0.cap
captures_networkTrafficImages\games\hon\hon_raw_capture10\hon_raw_capture10.0
hon_raw_capture10.0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

IndexError: list index out of range

In [53]:
#Isolate the outputs into <src_name>.connectionNumber.cap
##Then plot the connections onto a single graph.
###The dominant connection in a packet capture is the one you keep