Skip to content

Commit

Permalink
Economize on memory in http, and related changes.
Browse files Browse the repository at this point in the history
Refactor dpkt_http_replacement to only use two StringIO's. Move
print_rusage to pcaputil.
  • Loading branch information
Andrew Fleenor committed Aug 21, 2012
1 parent 624f478 commit 4120b4f
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 21 deletions.
8 changes: 2 additions & 6 deletions main.py
Expand Up @@ -17,6 +17,8 @@
from pcap2har import tcp
from pcap2har import settings
from pcap2har.packetdispatcher import PacketDispatcher
from pcap2har.pcaputil import print_rusage


# get cmdline args/options
parser = optparse.OptionParser(
Expand Down Expand Up @@ -64,12 +66,6 @@

logging.info('Flows=%d. HTTP pairs=%d' % (len(session.flows), len(session.entries)))

def print_rusage():
rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
if sys.platform == 'darwin':
rss /= 1024 # Mac OSX returns rss in bytes, not KiB
print 'max_rss:', rss, 'KiB'

#write the HAR file
with open(outputfile, 'w') as f:
json.dump(session, f, cls=har.JsonReprEncoder, indent=2, encoding='utf8', sort_keys=True)
Expand Down
32 changes: 21 additions & 11 deletions pcap2har/dpkt_http_replacement.py
Expand Up @@ -98,6 +98,24 @@ def parse_body(f, version, headers):
body = ''
return body

def parse_message(message, f):
"""
Unpack headers and optionally body from the passed file-like object.
Args:
message: Request or Response to which to add data.
f: file-like object, probably StringIO.
"""
# Parse headers
message.headers = parse_headers(f)
# Parse body, unless we know there isn't one
if not (getattr(message, 'status', None) in ('204', '304')):
message.body = parse_body(f, message.version, message.headers)
else:
message.body = ''
# Save the rest
message.data = f.read()

class Message(dpkt.Packet):
"""Hypertext Transfer Protocol headers + body."""
__metaclass__ = type
Expand All @@ -118,15 +136,7 @@ def __init__(self, *args, **kwargs):

def unpack(self, buf):
f = cStringIO.StringIO(buf)
# Parse headers
self.headers = parse_headers(f)
# Parse body, unless we know there isn't one
if not (getattr(self, 'status', None) in ('204', '304')):
self.body = parse_body(f, self.version, self.headers)
else:
self.body = ''
# Save the rest
self.data = f.read()
parse_message(self, f)

def pack_hdr(self):
return ''.join([ '%s: %s\r\n' % t for t in self.headers.iteritems() ])
Expand Down Expand Up @@ -169,7 +179,7 @@ def unpack(self, buf):
self.method = l[0]
self.uri = l[1]
self.version = l[2][len(self.__proto)+1:]
Message.unpack(self, f.read())
parse_message(self, f)

def __str__(self):
return '%s %s %s/%s\r\n' % (self.method, self.uri, self.__proto,
Expand All @@ -193,7 +203,7 @@ def unpack(self, buf):
self.version = l[0][len(self.__proto)+1:]
self.status = l[1]
self.reason = l[2]
Message.unpack(self, f.read())
parse_message(self, f)

def __str__(self):
return '%s/%s %s %s\r\n' % (self.__proto, self.version, self.status,
Expand Down
2 changes: 1 addition & 1 deletion pcap2har/http/flow.py
Expand Up @@ -105,7 +105,7 @@ def gather_messages(MessageClass, tcpdir):
pointer = 0 # starting index of data that MessageClass should look at
# while there's data left
while pointer < len(tcpdir.data):
curr_data = tcpdir.data[pointer:pointer+200] # debug var
#curr_data = tcpdir.data[pointer:pointer+200] # debug var
try:
msg = MessageClass(tcpdir, pointer)
except dpkt.Error as error: # if the message failed
Expand Down
5 changes: 3 additions & 2 deletions pcap2har/http/message.py
Expand Up @@ -23,8 +23,9 @@ def __init__(self, tcpdir, pointer, msgclass):
self.tcpdir = tcpdir
# attempt to parse as http. let exception fall out to caller
self.msg = msgclass(tcpdir.data[pointer:])
self.data = self.msg.data
self.data_consumed = (len(tcpdir.data) - pointer) - len(self.data)
self.data_consumed = (len(tcpdir.data) - pointer) - len(self.msg.data)
# save memory by deleting data attribute; it's useless
self.msg.data = None
# calculate sequence numbers of data
self.seq_start = tcpdir.byte_to_seq(pointer)
self.seq_end = tcpdir.byte_to_seq(pointer + self.data_consumed) # past-the-end
Expand Down
9 changes: 8 additions & 1 deletion pcap2har/pcaputil.py
Expand Up @@ -3,7 +3,8 @@
'''

import dpkt

import resource
import sys

# Re-implemented here only because it's missing on AppEngine.
def inet_ntoa(packed):
Expand Down Expand Up @@ -161,3 +162,9 @@ class FakeFlow(object):
def __init__(self, fwd, rev):
self.fwd = fwd
self.rev = rev

def print_rusage():
rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
if sys.platform == 'darwin':
rss /= 1024 # Mac OSX returns rss in bytes, not KiB
print 'max_rss:', rss, 'KiB'

0 comments on commit 4120b4f

Please sign in to comment.