Skip to content

Commit

Permalink
Merge changes from Matt Welsh and Ethan Katz-Basset
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrew Fleenor committed Jul 13, 2012
2 parents 007254b + 7a6d661 commit 2864de0
Show file tree
Hide file tree
Showing 11 changed files with 100 additions and 13 deletions.
19 changes: 15 additions & 4 deletions dpkt_http_replacement.py
Expand Up @@ -7,6 +7,8 @@


import cStringIO import cStringIO
import dpkt import dpkt
import logging
import settings


def parse_headers(f): def parse_headers(f):
"""Return dict of HTTP headers parsed from a file object.""" """Return dict of HTTP headers parsed from a file object."""
Expand Down Expand Up @@ -49,14 +51,23 @@ def parse_body(f, version, headers):
l.append(buf) l.append(buf)
else: else:
break break
if not found_end: if settings.strict_http_parse_body and not found_end:
raise dpkt.NeedData('premature end of chunked body') raise dpkt.NeedData('premature end of chunked body')
body = ''.join(l) body = ''.join(l)
elif 'content-length' in headers: elif 'content-length' in headers:
n = int(headers['content-length']) # Ethan K B: Have observed malformed 0,0 content lengths
try:
n = int(headers['content-length'])
except ValueError:
logging.warn('HTTP content-length "%s" is invalid, assuming 0' %
headers['content-length'])
n = 0
body = f.read(n) body = f.read(n)
if len(body) != n: if len(body) != n:
raise dpkt.NeedData('short body (missing %d bytes)' % (n - len(body))) logging.warn('HTTP content-length mismatch: expected %d, got %d', n,
len(body))
if settings.strict_http_parse_body:
raise dpkt.NeedData('short body (missing %d bytes)' % (n - len(body)))
else: else:
# XXX - need to handle HTTP/0.9 # XXX - need to handle HTTP/0.9
# BTW, this function is not called if status code is 204 or 304 # BTW, this function is not called if status code is 204 or 304
Expand Down Expand Up @@ -167,7 +178,7 @@ def unpack(self, buf):
f = cStringIO.StringIO(buf) f = cStringIO.StringIO(buf)
line = f.readline() line = f.readline()
l = line.strip().split(None, 2) l = line.strip().split(None, 2)
if len(l) < 2 or not l[0].startswith(self.__proto) or not l[1].isdigit(): if len(l) < 3 or not l[0].startswith(self.__proto) or not l[1].isdigit():
raise dpkt.UnpackError('invalid response: %r' % line) raise dpkt.UnpackError('invalid response: %r' % line)
self.version = l[0][len(self.__proto)+1:] self.version = l[0][len(self.__proto)+1:]
self.status = l[1] self.status = l[1]
Expand Down
2 changes: 1 addition & 1 deletion http/flow.py
Expand Up @@ -54,7 +54,7 @@ def __init__(self, tcpflow):
except LookupError: except LookupError:
# there were no responses after the first request # there were no responses after the first request
# there's nothing we can do # there's nothing we can do
logging.warning("Request has no reponse.") logging.warning("Request has no response.")


class MessagePair: class MessagePair:
''' '''
Expand Down
3 changes: 3 additions & 0 deletions main.py
Expand Up @@ -27,11 +27,14 @@
dest='drop_bodies', default=False) dest='drop_bodies', default=False)
parser.add_option('-r', '--resource-usage', action='store_true', parser.add_option('-r', '--resource-usage', action='store_true',
dest='resource_usage', default=False) dest='resource_usage', default=False)
parser.add_option('--pad_missing_tcp_data', action='store_true',
dest='pad_missing_tcp_data', default=False)
options, args = parser.parse_args() options, args = parser.parse_args()


# copy options to settings module # copy options to settings module
settings.process_pages = options.pages settings.process_pages = options.pages
settings.drop_bodies = options.drop_bodies settings.drop_bodies = options.drop_bodies
settings.pad_missing_tcp_data = options.pad_missing_tcp_data


# setup logs # setup logs
logging.basicConfig(filename='pcap2har.log', level=logging.INFO) logging.basicConfig(filename='pcap2har.log', level=logging.INFO)
Expand Down
8 changes: 7 additions & 1 deletion mediatype.py
Expand Up @@ -15,7 +15,7 @@ class MediaType(object):
# of string as the types above, values are pretty much anything but another # of string as the types above, values are pretty much anything but another
# semicolon # semicolon
mediatype_re = re.compile( mediatype_re = re.compile(
r'^([\w\-+.]+)/([\w\-+.]+)((?:\s*;\s*[\w\-]+=[^;]+)*)\s*$' r'^([\w\-+.]+)/([\w\-+.]+)((?:\s*;\s*[\w\-]+=[^;]+)*);?\s*$'
) )
# RE for parsing name-value pairs # RE for parsing name-value pairs
nvpair_re = re.compile(r'^\s*([\w\-]+)=([^;\s]+)\s*$') nvpair_re = re.compile(r'^\s*([\w\-]+)=([^;\s]+)\s*$')
Expand All @@ -25,6 +25,12 @@ def __init__(self, data):
Args: Args:
data = string, the media type string data = string, the media type string
''' '''
if not data:
log.warning('Setting empty media type to x-unknown-content-type')
self.type = 'application'
self.subtype = 'x-unknown-content-type'
params = {}
return
match = self.mediatype_re.match(data) match = self.mediatype_re.match(data)
if match: if match:
# get type/subtype # get type/subtype
Expand Down
2 changes: 1 addition & 1 deletion pcap.py
Expand Up @@ -66,5 +66,5 @@ def ParsePcap(dispatcher, filename=None, reader=None):
except dpkt.dpkt.NeedData as error: except dpkt.dpkt.NeedData as error:
log.warning(error) log.warning(error)
log.warning('A packet in the pcap file was too short, ' log.warning('A packet in the pcap file was too short, '
'debug_pkt_count=%d' % debug_pkt_count) 'packet_count=%d' % packet_count)
errors.append((None, error)) errors.append((None, error))
19 changes: 16 additions & 3 deletions pcaputil.py
Expand Up @@ -3,7 +3,12 @@
''' '''


import dpkt import dpkt
from socket import inet_ntoa
# use inet_ntoa to process IPs, if available (it's not on AppEngine)
try:
from socket import inet_ntoa
except ImportError:
inet_ntoa = lambda ip: ip


def friendly_tcp_flags(flags): def friendly_tcp_flags(flags):
''' '''
Expand Down Expand Up @@ -61,8 +66,16 @@ class ModifiedReader(object):
''' '''


def __init__(self, fileobj): def __init__(self, fileobj):
self.name = fileobj.name if hasattr(fileobj, 'name'):
self.fd = fileobj.fileno() self.name = fileobj.name
else:
self.name = '<unknown>'

if hasattr(fileobj, 'fileno'):
self.fd = fileobj.fileno()
else:
self.fd = None

self.__f = fileobj self.__f = fileobj
buf = self.__f.read(dpkt.pcap.FileHdr.__hdr_len__) buf = self.__f.read(dpkt.pcap.FileHdr.__hdr_len__)
self.__fh = dpkt.pcap.FileHdr(buf) self.__fh = dpkt.pcap.FileHdr(buf)
Expand Down
7 changes: 7 additions & 0 deletions settings.py
@@ -1,2 +1,9 @@
process_pages = True process_pages = True
drop_bodies = False # bodies of http responses drop_bodies = False # bodies of http responses

# Whether HTTP parsing should case whether the content length matches the
# content-length header.
strict_http_parse_body = True

# Whether to pad missing data in TCP flows with 0 bytes
pad_missing_tcp_data = True
Binary file added tcp/.packet.py.swp
Binary file not shown.
23 changes: 23 additions & 0 deletions tcp/direction.py
Expand Up @@ -2,6 +2,8 @@
import chunk as tcp import chunk as tcp
from operator import itemgetter, attrgetter from operator import itemgetter, attrgetter
import logging as log import logging as log
import settings
import packet


class Direction: class Direction:
''' '''
Expand Down Expand Up @@ -33,6 +35,7 @@ def __init__(self, flow):
self.final_arrival_pointer = None self.final_arrival_pointer = None
self.chunks = SortedCollection(key=attrgetter('seq_start')) self.chunks = SortedCollection(key=attrgetter('seq_start'))
self.final_data_chunk = None self.final_data_chunk = None

def add(self, pkt): def add(self, pkt):
''' '''
Merge the packet into the first chunk it overlaps with. If data was Merge the packet into the first chunk it overlaps with. If data was
Expand Down Expand Up @@ -137,6 +140,8 @@ def finish(self):
that self.data can be decided upon. Also calculates final_arrival for that self.data can be decided upon. Also calculates final_arrival for
any packets that arrived while seq_start was None any packets that arrived while seq_start was None
''' '''
if settings.pad_missing_tcp_data:
self.pad_missing_data()
self.finished = True self.finished = True
# calculate final_arrival # calculate final_arrival
if not self.final_arrival_data: if not self.final_arrival_data:
Expand All @@ -145,8 +150,10 @@ def finish(self):
if vertex[1].ts > peak_time: if vertex[1].ts > peak_time:
peak_time = vertex[1].ts peak_time = vertex[1].ts
self.final_arrival_data.insert((vertex[0], vertex[1].ts)) self.final_arrival_data.insert((vertex[0], vertex[1].ts))

if self.chunks and not self.final_data_chunk: if self.chunks and not self.final_data_chunk:
self.final_data_chunk = self.chunks[0] self.final_data_chunk = self.chunks[0]

def new_chunk(self, pkt): def new_chunk(self, pkt):
''' '''
creates a new tcp.Chunk for the pkt to live in. Only called if an creates a new tcp.Chunk for the pkt to live in. Only called if an
Expand Down Expand Up @@ -195,3 +202,19 @@ def seq_final_arrival(self, seq_num):
return self.final_arrival_data.find_le(seq_num)[1] return self.final_arrival_data.find_le(seq_num)[1]
except: except:
return None return None

def pad_missing_data(self):
'''Pad missing data in the flow with zero bytes.'''
if not self.chunks:
return
prev_chunk = self.chunks[0]
for chunk in self.chunks[1:]:
gap = chunk.seq_start - prev_chunk.seq_end
if gap > 0:
log.info('Padding %d missing bytes at %d',
gap, prev_chunk.seq_end)
first_chunk_pkt = self.seq_arrival(chunk.seq_start)
chunk_ts = first_chunk_pkt.ts
pad_pkt = packet.PadPacket(prev_chunk.seq_end, gap, chunk_ts)
self.add(pad_pkt)
prev_chunk = chunk
1 change: 1 addition & 0 deletions tcp/flow.py
Expand Up @@ -91,6 +91,7 @@ def finish(self):
self.flush_packets() self.flush_packets()
self.fwd.finish() self.fwd.finish()
self.rev.finish() self.rev.finish()

def samedir(self, pkt): def samedir(self, pkt):
''' '''
returns whether the passed packet is in the same direction as the returns whether the passed packet is in the same direction as the
Expand Down
29 changes: 26 additions & 3 deletions tcp/packet.py
Expand Up @@ -5,7 +5,7 @@ class Packet(object):
''' '''
Represents a TCP packet. Copied from pyper, with additions. contains Represents a TCP packet. Copied from pyper, with additions. contains
socket, timestamp, and data socket, timestamp, and data
Members: Members:
ts = dpkt timestamp ts = dpkt timestamp
buf = original data from which eth was constructed buf = original data from which eth was constructed
Expand Down Expand Up @@ -46,15 +46,38 @@ def __cmp__(self, other):
def __eq__(self, other): def __eq__(self, other):
return not self.__ne__(other) return not self.__ne__(other)
def __ne__(self, other): def __ne__(self, other):
if isinstance(other, TCPPacket): if isinstance(other, Packet):
return cmp(self, other) != 0 return cmp(self, other) != 0
else: else:
return True return True
def __repr__(self): def __repr__(self):
return 'TCPPacket(%s, %s, seq=%x , ack=%x, data="%s")' % ( return 'Packet(%s, %s, seq=%x , ack=%x, data="%s")' % (
friendly_socket(self.socket), friendly_socket(self.socket),
friendly_tcp_flags(self.tcp.flags), friendly_tcp_flags(self.tcp.flags),
self.tcp.seq, self.tcp.seq,
self.tcp.ack, self.tcp.ack,
friendly_data(self.tcp.data)[:60] friendly_data(self.tcp.data)[:60]
) )


class PadPacket(Packet):
'''
Represents a fake TCP packet used for padding missing data.
'''
def __init__(self, seq, size, ts):
self.ts = ts
self.buf = None
self.eth = None
self.ip = None
self.tcp = None
self.socket = None
self.data = '\0' * size
self.seq = seq
self.ack = None
self.flags = None
self.seq_start = seq
self.seq_end = self.seq_start + size
self.rtt = None

def __repr__(self):
return 'PadPacket(seq=%d, size=%d)' % (self.seq, len(self.data))

0 comments on commit 2864de0

Please sign in to comment.