Permalink
Browse files

Fixes from Likai to be more forgiving with missing times and dates

  • Loading branch information...
1 parent 219d002 commit b3f6e6716b3de1969c988330063dcc57c30d2486 Andrew Fleenor committed Mar 5, 2013
Showing with 39 additions and 16 deletions.
  1. +1 −1 main.py
  2. +4 −0 pcap2har/http/message.py
  3. +14 −12 pcap2har/httpsession.py
  4. +4 −2 pcap2har/pagetracker.py
  5. +16 −1 pcap2har/pcaputil.py
View
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
'''
Main program that converts pcaps to HAR's.
@@ -1,3 +1,5 @@
+import logging
+
class Message(object):
'''
Contains a dpkt.http.Request/Response, as well as other data required to
@@ -32,6 +34,8 @@ def __init__(self, tcpdir, pointer, msgclass):
# calculate arrival_times
self.ts_start = tcpdir.seq_final_arrival(self.seq_start)
self.ts_end = tcpdir.seq_final_arrival(self.seq_end - 1)
+ if self.ts_start is None or self.ts_end is None:
+ logging.warn('Got an HTTP message with unknown start or end time.')
# get raw body
self.raw_body = self.msg.body
self.__pointer = pointer
@@ -7,7 +7,7 @@
import dpkt
import logging
-from pcaputil import ms_from_timedelta, ms_from_dpkt_time
+from pcaputil import ms_from_dpkt_time, ms_from_dpkt_time_diff
from pagetracker import PageTracker
import http
import settings
@@ -35,24 +35,25 @@ def __init__(self, request, response):
self.request = request
self.response = response
self.pageref = None
- self.ts_start = int(request.ts_connect*1000)
- self.startedDateTime = datetime.utcfromtimestamp(request.ts_connect)
+ self.ts_start = ms_from_dpkt_time(request.ts_connect)
+ if request.ts_connect is None:
+ self.startedDateTime = None
+ else:
+ self.startedDateTime = datetime.utcfromtimestamp(request.ts_connect)
# calculate other timings
self.time_blocked = -1
self.time_dnsing = -1
self.time_connecting = (
- ms_from_dpkt_time(request.ts_start - request.ts_connect))
+ ms_from_dpkt_time_diff(request.ts_start, request.ts_connect))
self.time_sending = (
- ms_from_dpkt_time(request.ts_end - request.ts_start))
+ ms_from_dpkt_time_diff(request.ts_end, request.ts_start))
if response is not None:
self.time_waiting = (
- ms_from_dpkt_time(response.ts_start - request.ts_end))
+ ms_from_dpkt_time_diff(response.ts_start, request.ts_end))
self.time_receiving = (
- ms_from_dpkt_time(response.ts_end - response.ts_start))
+ ms_from_dpkt_time_diff(response.ts_end, response.ts_start))
endedDateTime = datetime.utcfromtimestamp(response.ts_end)
- self.total_time = ms_from_timedelta(
- endedDateTime - self.startedDateTime
- )
+ self.total_time = ms_from_dpkt_time_diff(response.ts_end, request.ts_connect)
else:
# this can happen if the request never gets a response
self.time_waiting = -1
@@ -64,8 +65,6 @@ def json_repr(self):
return a JSON serializable python object representation of self.
'''
d = {
- # Z means time is in UTC
- 'startedDateTime': self.startedDateTime.isoformat() + 'Z',
'time': self.total_time,
'request': self.request,
'response': self.response,
@@ -79,6 +78,9 @@ def json_repr(self):
},
'cache': {},
}
+ if self.startedDateTime:
+ # Z means time is in UTC
+ d['startedDateTime'] = self.startedDateTime.isoformat() + 'Z'
if self.pageref:
d['pageref'] = self.pageref
return d
@@ -49,12 +49,14 @@ def add(self, entry):
self.referrers.add(entry.request.url)
def json_repr(self):
- return {
+ d = {
'id': self.pageref,
- 'startedDateTime': self.startedDateTime.isoformat() + 'Z',
'title': self.title,
'pageTimings': default_page_timings
}
+ if self.startedDateTime:
+ d['startedDateTime'] = self.startedDateTime.isoformat() + 'Z'
+ return d
default_page_timings = {
View
@@ -69,11 +69,26 @@ def ms_from_timedelta(td):
def ms_from_dpkt_time(td):
'''
Get milliseconds from a dpkt timestamp. This should probably only really be
- done on a number gotten from subtracting two dpkt timestamps.
+ done on a number gotten from subtracting two dpkt timestamps. td could be
+ None if the packet if the packet the timestamp should have been gotten
+ from was missing, in which case -1 is returned.
'''
+ if td is None:
+ return -1
return int(td * 1000)
+def ms_from_dpkt_time_diff(td1, td2):
+ '''
+ Get milliseconds from the difference of two dpkt timestamps. Either
+ timestamp could be None if packets are missing, in which case -1 is
+ returned.
+ '''
+ if td1 is None or td2 is None:
+ return -1
+ return ms_from_dpkt_time(td1 - td2)
+
+
class ModifiedReader(object):
'''
A copy of the dpkt pcap Reader. The only change is that the iterator

0 comments on commit b3f6e67

Please sign in to comment.