Permalink
Browse files

Economize on memory in http, and related changes.

Refactor dpkt_http_replacement to only use two StringIO's. Move
print_rusage to pcaputil.
  • Loading branch information...
1 parent 624f478 commit 4120b4f77c11efd4bdef2a1d49f0b58c9efd5037 Andrew Fleenor committed Aug 21, 2012
Showing with 35 additions and 21 deletions.
  1. +2 −6 main.py
  2. +21 −11 pcap2har/dpkt_http_replacement.py
  3. +1 −1 pcap2har/http/flow.py
  4. +3 −2 pcap2har/http/message.py
  5. +8 −1 pcap2har/pcaputil.py
View
@@ -17,6 +17,8 @@
from pcap2har import tcp
from pcap2har import settings
from pcap2har.packetdispatcher import PacketDispatcher
+from pcap2har.pcaputil import print_rusage
+
# get cmdline args/options
parser = optparse.OptionParser(
@@ -64,12 +66,6 @@
logging.info('Flows=%d. HTTP pairs=%d' % (len(session.flows), len(session.entries)))
-def print_rusage():
- rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
- if sys.platform == 'darwin':
- rss /= 1024 # Mac OSX returns rss in bytes, not KiB
- print 'max_rss:', rss, 'KiB'
-
#write the HAR file
with open(outputfile, 'w') as f:
json.dump(session, f, cls=har.JsonReprEncoder, indent=2, encoding='utf8', sort_keys=True)
@@ -98,6 +98,24 @@ def parse_body(f, version, headers):
body = ''
return body
+def parse_message(message, f):
+ """
+ Unpack headers and optionally body from the passed file-like object.
+
+ Args:
+ message: Request or Response to which to add data.
+ f: file-like object, probably StringIO.
+ """
+ # Parse headers
+ message.headers = parse_headers(f)
+ # Parse body, unless we know there isn't one
+ if not (getattr(message, 'status', None) in ('204', '304')):
+ message.body = parse_body(f, message.version, message.headers)
+ else:
+ message.body = ''
+ # Save the rest
+ message.data = f.read()
+
class Message(dpkt.Packet):
"""Hypertext Transfer Protocol headers + body."""
__metaclass__ = type
@@ -118,15 +136,7 @@ def __init__(self, *args, **kwargs):
def unpack(self, buf):
f = cStringIO.StringIO(buf)
- # Parse headers
- self.headers = parse_headers(f)
- # Parse body, unless we know there isn't one
- if not (getattr(self, 'status', None) in ('204', '304')):
- self.body = parse_body(f, self.version, self.headers)
- else:
- self.body = ''
- # Save the rest
- self.data = f.read()
+ parse_message(self, f)
def pack_hdr(self):
return ''.join([ '%s: %s\r\n' % t for t in self.headers.iteritems() ])
@@ -169,7 +179,7 @@ def unpack(self, buf):
self.method = l[0]
self.uri = l[1]
self.version = l[2][len(self.__proto)+1:]
- Message.unpack(self, f.read())
+ parse_message(self, f)
def __str__(self):
return '%s %s %s/%s\r\n' % (self.method, self.uri, self.__proto,
@@ -193,7 +203,7 @@ def unpack(self, buf):
self.version = l[0][len(self.__proto)+1:]
self.status = l[1]
self.reason = l[2]
- Message.unpack(self, f.read())
+ parse_message(self, f)
def __str__(self):
return '%s/%s %s %s\r\n' % (self.__proto, self.version, self.status,
@@ -105,7 +105,7 @@ def gather_messages(MessageClass, tcpdir):
pointer = 0 # starting index of data that MessageClass should look at
# while there's data left
while pointer < len(tcpdir.data):
- curr_data = tcpdir.data[pointer:pointer+200] # debug var
+ #curr_data = tcpdir.data[pointer:pointer+200] # debug var
try:
msg = MessageClass(tcpdir, pointer)
except dpkt.Error as error: # if the message failed
@@ -23,8 +23,9 @@ def __init__(self, tcpdir, pointer, msgclass):
self.tcpdir = tcpdir
# attempt to parse as http. let exception fall out to caller
self.msg = msgclass(tcpdir.data[pointer:])
- self.data = self.msg.data
- self.data_consumed = (len(tcpdir.data) - pointer) - len(self.data)
+ self.data_consumed = (len(tcpdir.data) - pointer) - len(self.msg.data)
+ # save memory by deleting data attribute; it's useless
+ self.msg.data = None
# calculate sequence numbers of data
self.seq_start = tcpdir.byte_to_seq(pointer)
self.seq_end = tcpdir.byte_to_seq(pointer + self.data_consumed) # past-the-end
@@ -3,7 +3,8 @@
'''
import dpkt
-
+import resource
+import sys
# Re-implemented here only because it's missing on AppEngine.
def inet_ntoa(packed):
@@ -161,3 +162,9 @@ class FakeFlow(object):
def __init__(self, fwd, rev):
self.fwd = fwd
self.rev = rev
+
+def print_rusage():
+ rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+ if sys.platform == 'darwin':
+ rss /= 1024 # Mac OSX returns rss in bytes, not KiB
+ print 'max_rss:', rss, 'KiB'

0 comments on commit 4120b4f

Please sign in to comment.