Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge changes from Matt Welsh and Ethan Katz-Basset

  • Loading branch information...
commit 2864de0d60674884e99db71dd65deaffd4a92a48 2 parents 007254b + 7a6d661
Andrew Fleenor authored
19 dpkt_http_replacement.py
View
@@ -7,6 +7,8 @@
import cStringIO
import dpkt
+import logging
+import settings
def parse_headers(f):
"""Return dict of HTTP headers parsed from a file object."""
@@ -49,14 +51,23 @@ def parse_body(f, version, headers):
l.append(buf)
else:
break
- if not found_end:
+ if settings.strict_http_parse_body and not found_end:
raise dpkt.NeedData('premature end of chunked body')
body = ''.join(l)
elif 'content-length' in headers:
- n = int(headers['content-length'])
+ # Ethan K B: Have observed malformed 0,0 content lengths
+ try:
+ n = int(headers['content-length'])
+ except ValueError:
+ logging.warn('HTTP content-length "%s" is invalid, assuming 0' %
+ headers['content-length'])
+ n = 0
body = f.read(n)
if len(body) != n:
- raise dpkt.NeedData('short body (missing %d bytes)' % (n - len(body)))
+ logging.warn('HTTP content-length mismatch: expected %d, got %d', n,
+ len(body))
+ if settings.strict_http_parse_body:
+ raise dpkt.NeedData('short body (missing %d bytes)' % (n - len(body)))
else:
# XXX - need to handle HTTP/0.9
# BTW, this function is not called if status code is 204 or 304
@@ -167,7 +178,7 @@ def unpack(self, buf):
f = cStringIO.StringIO(buf)
line = f.readline()
l = line.strip().split(None, 2)
- if len(l) < 2 or not l[0].startswith(self.__proto) or not l[1].isdigit():
+ if len(l) < 3 or not l[0].startswith(self.__proto) or not l[1].isdigit():
raise dpkt.UnpackError('invalid response: %r' % line)
self.version = l[0][len(self.__proto)+1:]
self.status = l[1]
2  http/flow.py
View
@@ -54,7 +54,7 @@ def __init__(self, tcpflow):
except LookupError:
# there were no responses after the first request
# there's nothing we can do
- logging.warning("Request has no reponse.")
+ logging.warning("Request has no response.")
class MessagePair:
'''
3  main.py
View
@@ -27,11 +27,14 @@
dest='drop_bodies', default=False)
parser.add_option('-r', '--resource-usage', action='store_true',
dest='resource_usage', default=False)
+parser.add_option('--pad_missing_tcp_data', action='store_true',
+ dest='pad_missing_tcp_data', default=False)
options, args = parser.parse_args()
# copy options to settings module
settings.process_pages = options.pages
settings.drop_bodies = options.drop_bodies
+settings.pad_missing_tcp_data = options.pad_missing_tcp_data
# setup logs
logging.basicConfig(filename='pcap2har.log', level=logging.INFO)
8 mediatype.py
View
@@ -15,7 +15,7 @@ class MediaType(object):
# of string as the types above, values are pretty much anything but another
# semicolon
mediatype_re = re.compile(
- r'^([\w\-+.]+)/([\w\-+.]+)((?:\s*;\s*[\w\-]+=[^;]+)*)\s*$'
+ r'^([\w\-+.]+)/([\w\-+.]+)((?:\s*;\s*[\w\-]+=[^;]+)*);?\s*$'
)
# RE for parsing name-value pairs
nvpair_re = re.compile(r'^\s*([\w\-]+)=([^;\s]+)\s*$')
@@ -25,6 +25,12 @@ def __init__(self, data):
Args:
data = string, the media type string
'''
+ if not data:
+ log.warning('Setting empty media type to x-unknown-content-type')
+ self.type = 'application'
+ self.subtype = 'x-unknown-content-type'
+ params = {}
+ return
match = self.mediatype_re.match(data)
if match:
# get type/subtype
2  pcap.py
View
@@ -66,5 +66,5 @@ def ParsePcap(dispatcher, filename=None, reader=None):
except dpkt.dpkt.NeedData as error:
log.warning(error)
log.warning('A packet in the pcap file was too short, '
- 'debug_pkt_count=%d' % debug_pkt_count)
+ 'packet_count=%d' % packet_count)
errors.append((None, error))
19 pcaputil.py
View
@@ -3,7 +3,12 @@
'''
import dpkt
-from socket import inet_ntoa
+
+# use inet_ntoa to process IPs, if available (it's not on AppEngine)
+try:
+ from socket import inet_ntoa
+except ImportError:
+ inet_ntoa = lambda ip: ip
def friendly_tcp_flags(flags):
'''
@@ -61,8 +66,16 @@ class ModifiedReader(object):
'''
def __init__(self, fileobj):
- self.name = fileobj.name
- self.fd = fileobj.fileno()
+ if hasattr(fileobj, 'name'):
+ self.name = fileobj.name
+ else:
+ self.name = '<unknown>'
+
+ if hasattr(fileobj, 'fileno'):
+ self.fd = fileobj.fileno()
+ else:
+ self.fd = None
+
self.__f = fileobj
buf = self.__f.read(dpkt.pcap.FileHdr.__hdr_len__)
self.__fh = dpkt.pcap.FileHdr(buf)
7 settings.py
View
@@ -1,2 +1,9 @@
process_pages = True
drop_bodies = False # bodies of http responses
+
+# Whether HTTP parsing should case whether the content length matches the
+# content-length header.
+strict_http_parse_body = True
+
+# Whether to pad missing data in TCP flows with 0 bytes
+pad_missing_tcp_data = True
BIN  tcp/.packet.py.swp
View
Binary file not shown
23 tcp/direction.py
View
@@ -2,6 +2,8 @@
import chunk as tcp
from operator import itemgetter, attrgetter
import logging as log
+import settings
+import packet
class Direction:
'''
@@ -33,6 +35,7 @@ def __init__(self, flow):
self.final_arrival_pointer = None
self.chunks = SortedCollection(key=attrgetter('seq_start'))
self.final_data_chunk = None
+
def add(self, pkt):
'''
Merge the packet into the first chunk it overlaps with. If data was
@@ -137,6 +140,8 @@ def finish(self):
that self.data can be decided upon. Also calculates final_arrival for
any packets that arrived while seq_start was None
'''
+ if settings.pad_missing_tcp_data:
+ self.pad_missing_data()
self.finished = True
# calculate final_arrival
if not self.final_arrival_data:
@@ -145,8 +150,10 @@ def finish(self):
if vertex[1].ts > peak_time:
peak_time = vertex[1].ts
self.final_arrival_data.insert((vertex[0], vertex[1].ts))
+
if self.chunks and not self.final_data_chunk:
self.final_data_chunk = self.chunks[0]
+
def new_chunk(self, pkt):
'''
creates a new tcp.Chunk for the pkt to live in. Only called if an
@@ -195,3 +202,19 @@ def seq_final_arrival(self, seq_num):
return self.final_arrival_data.find_le(seq_num)[1]
except:
return None
+
+ def pad_missing_data(self):
+ '''Pad missing data in the flow with zero bytes.'''
+ if not self.chunks:
+ return
+ prev_chunk = self.chunks[0]
+ for chunk in self.chunks[1:]:
+ gap = chunk.seq_start - prev_chunk.seq_end
+ if gap > 0:
+ log.info('Padding %d missing bytes at %d',
+ gap, prev_chunk.seq_end)
+ first_chunk_pkt = self.seq_arrival(chunk.seq_start)
+ chunk_ts = first_chunk_pkt.ts
+ pad_pkt = packet.PadPacket(prev_chunk.seq_end, gap, chunk_ts)
+ self.add(pad_pkt)
+ prev_chunk = chunk
1  tcp/flow.py
View
@@ -91,6 +91,7 @@ def finish(self):
self.flush_packets()
self.fwd.finish()
self.rev.finish()
+
def samedir(self, pkt):
'''
returns whether the passed packet is in the same direction as the
29 tcp/packet.py
View
@@ -5,7 +5,7 @@ class Packet(object):
'''
Represents a TCP packet. Copied from pyper, with additions. contains
socket, timestamp, and data
-
+
Members:
ts = dpkt timestamp
buf = original data from which eth was constructed
@@ -46,15 +46,38 @@ def __cmp__(self, other):
def __eq__(self, other):
return not self.__ne__(other)
def __ne__(self, other):
- if isinstance(other, TCPPacket):
+ if isinstance(other, Packet):
return cmp(self, other) != 0
else:
return True
def __repr__(self):
- return 'TCPPacket(%s, %s, seq=%x , ack=%x, data="%s")' % (
+ return 'Packet(%s, %s, seq=%x , ack=%x, data="%s")' % (
friendly_socket(self.socket),
friendly_tcp_flags(self.tcp.flags),
self.tcp.seq,
self.tcp.ack,
friendly_data(self.tcp.data)[:60]
)
+
+
+class PadPacket(Packet):
+ '''
+ Represents a fake TCP packet used for padding missing data.
+ '''
+ def __init__(self, seq, size, ts):
+ self.ts = ts
+ self.buf = None
+ self.eth = None
+ self.ip = None
+ self.tcp = None
+ self.socket = None
+ self.data = '\0' * size
+ self.seq = seq
+ self.ack = None
+ self.flags = None
+ self.seq_start = seq
+ self.seq_end = self.seq_start + size
+ self.rtt = None
+
+ def __repr__(self):
+ return 'PadPacket(seq=%d, size=%d)' % (self.seq, len(self.data))
Please sign in to comment.
Something went wrong with that request. Please try again.