In [1]:
import dpkt

# Reassemble HTTP

In [24]:
class Packet:
    '''Encapsulate TCP's header fields of a packet from pcap.
    
    Attributes:
        source_port (int):  source port number
        dest_port (int):    destination port number
        sequence_num (int): sequence number
        ack_num (int):      acknowledgement number
        head_len(int):      header length
        urg (int):          urgent flag
        ack (int):          acknowledgement flag
        psh (int):          psh flag
        rst (int):          reset flag
        syn (int):          synchronize flag
        fin (int):          finish flag
        receive_win (int):  receive window
        checksum (int):     checksum
        urgent (int):       urgent data pointer
        scale (int):        window scaling size
        size (int):         the size of the whole packet, including data and all headers
        payload (bytes):    TCP payload
        payload_len (int):  TCP payload length
    '''
    
    def __init__(self, packet):
        '''Init a packet
        
        Args:
            packet(tuple): an element from dpkt.pcap.Reader.readpkts()
        '''
        self.time_stamp = packet[0]
        self.byte_info  = packet[1]
        self.size = len(packet[1])

        
    def parse_byte_info(self):
        '''Convert the byte format information of a packet into human readable fields
        '''
        self.source_port  = int.from_bytes(self.byte_info[34:36], byteorder='big')
        self.dest_port    = int.from_bytes(self.byte_info[36:38], byteorder='big')
        self.sequence_num = int.from_bytes(self.byte_info[38:42], byteorder='big')
        self.ack_num      = int.from_bytes(self.byte_info[42:46], byteorder='big')
        head_len          = int.from_bytes(self.byte_info[46:47], byteorder='big')
        self.head_len     = 4*(head_len>>4)
        flags             = int.from_bytes(self.byte_info[47:48], byteorder='big')
        self.fin = flags&1
        flags = flags>>1
        self.syn = flags&1
        flags = flags>>1
        self.rst = flags&1
        flags = flags>>1
        self.psh = flags&1
        flags = flags>>1
        self.ack = flags&1
        flags = flags>>1
        self.urg = flags&1
        self.receive_win = int.from_bytes(self.byte_info[48:50], byteorder='big')
        self.checksum    = int.from_bytes(self.byte_info[50:52], byteorder='big')
        self.urgent      = int.from_bytes(self.byte_info[52:54], byteorder='big')
        self.payload     = self.byte_info[34+packet.head_len:]
        self.payload_len = len(self.payload)
        
        
    def parse_window_scale(self):
        '''shift window size is typically 14. so the scaling is 2^14 = 16384
        '''
        shift = int.from_bytes(self.byte_info[73:74], byteorder='big')
        self.scale = 1<<shift

        
    def __str__(self):
        string = 'Source Port #  = {}\n'.format(self.source_port)
        string = string + 'Dest Port #    = {}\n'.format(self.dest_port)
        string = string + 'Sequence #     = {}\n'.format(self.sequence_num)
        string = string + 'Ackownledge #  = {}\n'.format(self.ack_num)
        string = string + 'Header length  = {}\n'.format(self.head_len)
        string = string + 'URG({}) ACK({}) PSH({})\n'.format(self.urg, self.ack, self.psh)
        string = string + 'RST({}) SYN({}) FIN({})\n'.format(self.rst, self.syn, self.fin)
        string = string + 'Receive window = {}\n'.format(self.receive_win)
        string = string + 'Checksum       = {}\n'.format(self.checksum)
        string = string + 'Urgent         = {}\n'.format(self.urgent)
        string = string + 'Payload len    = {}\n'.format(self.payload_len)
        return string

In [126]:
class Flow:
    '''Encapsulate a flow of packets from one port of sender to another port of receiver
    
    Attributes:
        __ID  (int):  private class member identification
        ID    (int):  identification of a flow
        port1 (int):  a port number
        port2 (int):  a port number
        flow  (list): a list of Packet
        throughput_emp (float): empirical throughput
        rtt (float): round trip time
        counter (int): count the number of packets in this flow
        scale (int):   window scaling size
        tda (int):     number of triple duplicate ack occurs
        timeout (int): number of timeout occurs
    '''
    __ID = 100
    
    def __init__(self):
        self.ID    = Flow.__ID
        Flow.__ID += 1
        self.port1 = -1
        self.port2 = -1
        self.flow  = []
        self.throughput_emp = -1
        self.rtt     = -1
        self.counter = 0
        self.scale   = 1
        print('init a new flow {}'.format(self.ID))
        
    
    def __str__(self):
        return 'ID={}  port1={}  port2={}  # of packets={}'.format(self.ID, self.port1, self.port2, self.counter)
         
    
    def set_port(self, packet):
        self.port1 = packet.source_port
        self.port2 = packet.dest_port
        
    
    def get_packet(self, index):
        if index >= 0 and index < len(self.flow):
            return self.flow[index]
        else:
            return None
        
    
    def add_packet(self, packet):
        self.flow.append(packet)
        self.counter += 1
        
        
    def compute_throughput(self):
        '''Compute the throughput for data sent from source to destination. 
           To estimate throughput count all data and headers. You need to 
           figure out how to define throughput in terms of what you are including as part of the throughput estimation.
        '''
        total_data = 0
        for packet in self.flow:
            total_data += getattr(packet, 'size')
            
        start = getattr(self.flow[0], 'time_stamp')
        end   = getattr(self.flow[self.counter-1], 'time_stamp')
        elapse = end - start
        self.throughput_emp = (total_data*8.0)/(elapse*1000000)
        print('***Flow {}***'.format(self.ID))
        print('Throughput is {0:1.5f} Mbps\n'.format(self.throughput_emp))
        
        
    def estimateRTT(self):
        '''Estimate the average RTT. Now compare your empirical throughput from (b) 
           and the theoretical throughput (estimated using the formula derived in class). Explain your comparison.
        '''
        payload = 1448       # 1448 byte is the max amount of payload in a TCP segment
        sender_dic = {}      # stony brook  {seq --> packet}
        sender_dic_ret = {}  # packets that are retransmitted
        receiver_dic = {}    # washington   {ack --> packet}
        for packet in self.flow:
            source_port = getattr(packet, 'source_port')
            if source_port == self.port1:  # sender --> receiver
                seq = getattr(packet, 'sequence_num')
                if sender_dic.get(seq):    # retransmmision packets appear twice
                    sender_dic_ret[seq] = packet
                else:
                    sender_dic[seq] = packet
            else:                          # receiver --> sender
                ack = getattr(packet, 'ack_num')
                receiver_dic[ack] = packet
        
        for key in sender_dic_ret:  # when esitmating RTT, we do not consider the retransmission case
            sender_dic.pop(key)
            
        total_time = 0
        counter = 0
        for ack, ack_packet in receiver_dic.items():
            seq = ack - payload
            seq_packet = sender_dic.get(seq)
            if seq_packet:
                time2 = getattr(ack_packet, 'time_stamp')
                time1 = getattr(seq_packet, 'time_stamp')
                total_time += (time2 - time1)
                counter += 1
        self.rtt = total_time/counter
        print('***Flow {}***'.format(self.ID))
        print('Estimated RTT is {0:1.5f} second'.format(self.rtt))
  

    def reassemble_http(self):
        '''Reassemble each unique HTTP Request/Response for http_1080.pcap 
           (the other two are encrypted, so you will not be able to reassemble easily). 
           The output of this part should be the Packet type (request or response) and the 
           unique <source, dest, seq, ack> TCP tuple for all the TCP segments that contain data for that request.
        '''
        get_packets = []
        for packet in self.flow:       # find all the get packets
            payload = getattr(packet, 'payload')
            if str(payload).find('GET') != -1:
                get_packets.append(packet)
        
        packet_dict = {}
        for packet in self.flow:
            seq = getattr(packet, 'sequence_num')
            packet_dict[seq] = packet  # the latter packets with the same seq number will replace the former one, which is okay.
            
        reassembles = []
        
        for get in get_packets:
            reassemble = ReassembleHTTP(get)
            next_seq = getattr(get, 'ack_num')     # start from the ack of GET request
            next_packet = packet_dict.get(next_seq)
            while next_packet:
                reassemble.add_tcp_segment(next_packet)
                payload_len = getattr(next_packet, 'payload_len')
                next_seq = next_seq + payload_len
                next_packet = packet_dict.get(next_seq)
                if getattr(next_packet, 'fin') == 1:
                    break
            reassembles.append(reassemble)
            
        for reassemble in reassembles:
            reassemble.print_reassembleHTTP()

In [130]:
class ReassembleHTTP:
    '''Reassemble the multiply packets of one HTTP Request/Response
    
    Attributes:
        request (str): what this HTTP request for
        response (str): version and status code
    '''
    def __init__(self, get_packet):   # use the request get to init, get only need one packet
        start = str(get_packet.payload).find('GET')
        end1 = str(get_packet.payload).find('HTTP')
        end2 = str(get_packet.payload).find('Connection')
        end  = end1 if end1 > end2 else end2
        self.request = str(get_packet.payload)[start:end]
        self.tcp_segment = []
        
    def add_tcp_segment(self, packet):
        source = getattr(packet, 'source_port')
        dest   = getattr(packet, 'dest_port')
        seq    = getattr(packet, 'sequence_num')
        ack    = getattr(packet, 'ack_num')
        self.tcp_segment.append((source, dest, seq, ack))
    
    
    def print_reassembleHTTP(self):
        print(self.request)
        print('The TCP segments are below:')
        for segment in self.tcp_segment:
            print(segment)
            

In [135]:
class FlowManager:
    '''Manage some flows
    
    Attributes:
        flow_list (list): an list(array) of Flow
        flow_info (dict): a dict { ID : (index, port1, port2) }
    '''
    
    def __init__(self):
        self.flow_list = []
        self.flow_info = {}
        
        
    def add_packet(self, packet):
        '''Add a packet to the flow it belongs to. 
           If the flow does not exit, then create a new one.
           
        Args:
            packet (Packet)
        '''
        index = self.where_is_packet(packet)
        if index == -1:  # this is a "new packet": the packet does not belong to any existed flow
            new_flow = Flow()
            new_flow.set_port(packet)
            new_flow.add_packet(packet)
            self.add_flow(new_flow)
        else:            # this packet belongs to an existed flow
            self.flow_list[index].add_packet(packet)
    
    
    def add_flow(self, flow):
        '''Add a new flow into FlowManager
        
        Args:
            flow (Flow): a new flow to be added to the flow manager
        '''
        index = len(self.flow_list)
        self.flow_list.append(flow)
        ID  = getattr(flow, 'ID')
        port1 = getattr(flow, 'port1')
        port2 = getattr(flow, 'port2')
        self.flow_info[ID] = (index, port1, port2)
        
    
    def where_is_packet(self, packet):
        '''Return the flow's index to which a packet belongs
        
        Args:
            packet (Packet): a packet
        
        Return:
            (int): index 
        '''
        source_port = getattr(packet, 'source_port')
        dest_port = getattr(packet, 'dest_port')
        for ID, info in self.flow_info.items():
            if (source_port == info[1] and dest_port == info[2]) or (source_port == info[2] and dest_port == info[1]):
                return info[0]
        else:
            return -1
        
    
    def size(self):
        return len(self.flow_list)
    
    
    def get_flow(self, ID):
        '''Get a flow according to its ID
        
        Args:
            flow (Flow): Identification number
        '''
        flow_info = self.flow_info.get(ID)
        if flow_info:
            index = flow_info[0]
            return self.flow_list[index]
        return None
    
    
    def partC_1(self):
        for flow in self.flow_list:
            flow.reassemble_http()
            print('\n\n')

In [139]:
f = open('http_1080.pcap', 'rb')
pcap = dpkt.pcap.Reader(f)
packets_bytes = pcap.readpkts()

flow_manager = FlowManager()

packets = []
for packet_bytes in packets_bytes:
    packet = Packet(packet_bytes)
    packet.parse_byte_info()
    flow_manager.add_packet(packet)
    packets.append(packet)
    

init a new flow 168
init a new flow 169
init a new flow 170
init a new flow 171
init a new flow 172
init a new flow 173
init a new flow 174
init a new flow 175
init a new flow 176
init a new flow 177
init a new flow 178
init a new flow 179
init a new flow 180
init a new flow 181
init a new flow 182
init a new flow 183
init a new flow 184


In [140]:
flow_manager.partC_1()

GET / HTTP/1.1\r\nHost: www.sbunetsyslabs.com:1080\r\n
The TCP segments are below:
(1080, 51948, 1685743982, 3241043857)



GET /img/apple.jpeg HTTP/1.1\r\nHost: www.sbunetsyslabs.com:1080\r\n
The TCP segments are below:
(1080, 51949, 2357262570, 2297690016)
(1080, 51949, 2357263956, 2297690016)
(1080, 51949, 2357265342, 2297690016)
(1080, 51949, 2357266728, 2297690016)



GET /js/hello.js HTTP/1.1\r\nHost: www.sbunetsyslabs.com:1080\r\n
The TCP segments are below:
(1080, 51950, 1041205408, 1462437502)



GET /img/orange.jpeg HTTP/1.1\r\nHost: www.sbunetsyslabs.com:1080\r\n
The TCP segments are below:
(1080, 51951, 2566683428, 4163756188)
(1080, 51951, 2566684814, 4163756188)
(1080, 51951, 2566686200, 4163756188)
(1080, 51951, 2566687586, 4163756188)
(1080, 51951, 2566688972, 4163756188)
(1080, 51951, 2566690358, 4163756188)



GET /img/banana.jpeg HTTP/1.1\r\nHost: www.sbunetsyslabs.com:1080\r\n
The TCP segments are below:
(1080, 51952, 2059513040, 2756255158)
(1080, 51952, 2059514426

(1080, 51961, 1913684930, 1902937388)
(1080, 51961, 1913686316, 1902937388)
(1080, 51961, 1913687702, 1902937388)
(1080, 51961, 1913689088, 1902937388)
(1080, 51961, 1913690474, 1902937388)
(1080, 51961, 1913691860, 1902937388)
(1080, 51961, 1913693246, 1902937388)
(1080, 51961, 1913694632, 1902937388)
(1080, 51961, 1913696018, 1902937388)
(1080, 51961, 1913697404, 1902937388)
(1080, 51961, 1913698790, 1902937388)
(1080, 51961, 1913700176, 1902937388)
(1080, 51961, 1913701562, 1902937388)
(1080, 51961, 1913702948, 1902937388)
(1080, 51961, 1913704334, 1902937388)
(1080, 51961, 1913705720, 1902937388)
(1080, 51961, 1913707106, 1902937388)
(1080, 51961, 1913708492, 1902937388)
(1080, 51961, 1913709878, 1902937388)
(1080, 51961, 1913711264, 1902937388)
(1080, 51961, 1913712650, 1902937388)
(1080, 51961, 1913714036, 1902937388)
(1080, 51961, 1913715422, 1902937388)
(1080, 51961, 1913716808, 1902937388)
(1080, 51961, 1913718194, 1902937388)
(1080, 51961, 1913719580, 1902937388)
(1080, 51961