Permalink
Browse files

Merge branch 'master' of http://github.com/songlibo/pcap2har into lib…

…o-sll
  • Loading branch information...
2 parents 182bede + c583c06 commit f716cd37f9e1dabb82500d9cfc95e09563257c15 Andrew Fleenor committed Nov 16, 2010
Showing with 43 additions and 16 deletions.
  1. +1 −9 har.py
  2. +2 −3 http/response.py
  3. +26 −1 httpsession.py
  4. +14 −3 pcap.py
View
@@ -47,16 +47,8 @@ def HTTPResponseJsonRepr(self):
content = {
'size': len(self.body),
'compression': len(self.body) - len(self.raw_body),
+ 'mimeType': self.mimeType
}
- if self.mimeType != '':
- content['mimeType'] = self.mimeType
- else:
- # TBD: this case causes verification errors--change the spec? set a default?
- # this happens when there's no content-type header, which often
- # happens for an empty body (which often happens for 301 (redirect),
- # and sometimes even for 200)
- content['mimeType'] = self.mimeType
-
if self.text:
content['text'] = self.text.encode('utf8') # must transcode to utf8
return {
View
@@ -33,10 +33,9 @@ def __init__(self, tcpdir, pointer):
# get mime type
if 'content-type' in self.msg.headers:
self.mediaType = MediaType(self.msg.headers['content-type'])
- self.mimeType = self.mediaType.mimeType()
else:
- self.mediaType = None
- self.mimeType = ''
+ self.mediaType = MediaType('application/x-unknown-content-type')
+ self.mimeType = self.mediaType.mimeType()
# try to get out unicode
self.handle_text()
def handle_compression(self):
View
@@ -9,6 +9,11 @@
class Page:
'''
Represents a page entry in the HAR. Requests refer to one by its url.
+
+ Members:
+ title = string, the title of the page or the url
+ startedDateTime = datetime.datetime
+ url = the page url
'''
def __init__(self, url, title, startedDateTime):
self.title = title
@@ -109,10 +114,24 @@ def page2title(page):
return page
class PageTracker(object):
+ '''
+ Keeps track of the pages that show up the pcap.
+
+ Members:
+ pages = {page_url(string): [pageref(string), start_time (datetime), title]}
+ '''
def __init__(self):
self.pages = dict() # {page: [ref_string, start_time, title]}
def getref(self, page, start_time):
+ '''
+ Either finds or creates the pageref for the page. Returns the pageref,
+ and adds the page to self.pages.
+
+ Arguments:
+ page = url
+ start_time = datetime
+ '''
if page not in self.pages:
idx = len(self.pages)
self.pages[page] = ['pageref_%d'%(idx), start_time, page2title(page)]
@@ -121,6 +140,12 @@ def getref(self, page, start_time):
self.pages[page][1] = start_time
return self.pages[page][0]
+ # hack until we feel like actually calculating these, if it's possible
+ default_page_timings = {
+ 'onContentLoad': -1,
+ 'onLoad': -1
+ }
+
def json_repr(self):
'''
return a JSON serializable python object representation of self.
@@ -130,7 +155,7 @@ def json_repr(self):
'startedDateTime': start_time.isoformat() + 'Z', # assume time is in UTC
'id': page_ref,
'title': title if title != '' else 'top',
- 'pageTimings': {}
+ 'pageTimings': PageTracker.default_page_timings
} for page_str, page_ref, start_time, title in srt]
class HTTPSession(object):
View
@@ -42,7 +42,13 @@ def __init__(self, pcap_reader):
self.errors.append((pkt, 'packet is too short', debug_pkt_count))
# parse packet
try:
- eth = dpkt.ethernet.Ethernet(pkt[1])
+ dltoff = dpkt.pcap.dltoff
+ if pcap_reader.dloff == dltoff[dpkt.pcap.DLT_LINUX_SLL]:
+ eth = dpkt.sll.SLL(pkt[1])
+ else:
+ # TODO(lsong): Check other packet type.
+ # Default is ethernet.
+ eth = dpkt.ethernet.Ethernet(pkt[1])
if isinstance(eth.data, dpkt.ip.IP):
ip = eth.data
if isinstance(ip.data, dpkt.tcp.TCP):
@@ -72,8 +78,13 @@ def process_packet(self, pkt):
srcip, srcport = src
dstip, dstport = dst
if (srcport == 5223 or dstport == 5223):
- # hpvirtgrp
- log.debug("hpvirtgrp packets are ignored.")
+ log.warning("hpvirtgrp packets are ignored.")
+ return
+ if (srcport == 5228 or dstport == 5228):
+ log.warning("hpvroom packets are ignored.")
+ return
+ if (srcport == 443 or dstport == 443):
+ log.warning("HTTPS packets are ignored.")
return
if (src, dst) in self.flowdict:

0 comments on commit f716cd3

Please sign in to comment.