Permalink
Browse files

Bugfixes on some of the lower-level libraries.

Implemented most (if not all) of the new serializer. It is not as efficient as it will be since the compressor part hasn't yet been modified to deal with implicit indices and the lack of 'rem' instructions.
Modified the harfile library to use a real JSON parser (much more safe, just in case someone is malicious).
  • Loading branch information...
1 parent a1297fa commit d839d76894d1ea5b2653ab0b5696507f7664c79a Roberto Peon committed Nov 16, 2012
@@ -259,6 +259,7 @@ class BitBucket {
}
bool GetBit() {
+ if (idx_byte >= bsa.size()) return 0;
bool bit = bsa[idx_byte] & (0x80 >> idx_boff);
++idx_boff;
if (idx_boff >= 8) {
@@ -2,7 +2,9 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from common_utils import FormatAsBits
+from common_utils import StrToList
import sys
+import struct
class BitBucket:
def __init__(self):
@@ -18,6 +20,18 @@ def Clear(self):
def StoreBit(self, bit):
self.StoreBits( ([bit << 7], 1) )
+ def StoreBits8(self, val):
+ tmp_val = struct.pack(">B", val)
+ self.StoreBits( (StrToList(tmp_val), 8))
+
+ def StoreBits16(self, val):
+ tmp_val = struct.pack(">H", val)
+ self.StoreBits( (StrToList(tmp_val), 16))
+
+ def StoreBits32(self, val):
+ tmp_val = struct.pack(">L", val)
+ self.StoreBits( (StrToList(tmp_val), 32))
+
def StoreBits(self, input):
(inp_bytes, inp_bits) = input
old_out_boff = self.out_boff
@@ -29,9 +43,12 @@ def StoreBits(self, input):
leftover_bits = 8
if self.out_boff == 0:
self.output.extend(inp_bytes)
- if leftover_bits:
- self.output[-1] &= ~(255 >> leftover_bits)
- self.out_boff = leftover_bits % 8
+ if not type(inp_bytes[0]) == int:
+ print "type(inp_bytes[0]) == ", type(inp_bytes[0])
+ print repr(input)
+ raise StandardError()
+ self.output[-1] &= ~(255 >> leftover_bits)
+ self.out_boff = leftover_bits % 8
else:
# We know there is a non-zero bit offset if we're below here.
# This also implies there MUST be a byte in output already.
@@ -63,16 +80,36 @@ def NumBits(self):
print "WTF"
return num_bits
+ def BytesOfStorage(self):
+ return (self.NumBits() + 7) / 8
+
def BitsRemaining(self):
return self.NumBits() - (8*self.idx_byte + self.idx_boff)
def AllConsumed(self):
return self.NumBits() <= (8*self.idx_byte + self.idx_boff)
+ def GetBits8(self):
+ raw_data = self.GetBits(8)[0]
+ arg = "%c%c%c%c" % (0,0, 0, raw_data[0])
+ return struct.unpack(">L", arg)[0]
+
+ def GetBits16(self):
+ raw_data = self.GetBits(16)[0]
+ arg = "%c%c%c%c" % (0,0, raw_data[0], raw_data[1])
+ return struct.unpack(">L", arg)[0]
+
+ def GetBits32(self):
+ raw_data = self.GetBits(32)[0]
+ arg = "%c%c%c%c" % (raw_data[0], raw_data[1], raw_data[2], raw_data[3])
+ return struct.unpack(">L", arg)[0]
+
def GetBits(self, num_bits):
old_idx_boff = self.idx_boff
- if num_bits > self.NumBits() - (8*self.idx_byte + self.idx_boff):
+ bits_available = self.NumBits() - (8*self.idx_byte + self.idx_boff)
+ if num_bits > bits_available:
+ print "num_bits: %d but bits_available: %d" % (num_bits, bits_available)
raise StandardError()
retval = []
bits_left = num_bits
@@ -131,10 +168,5 @@ def DebugFormat(self):
sys.stdout.write("-")
print "^"
- def __str__(self):
- return FormatAsBits((self.output, self.out_boff))
-
def __repr__(self):
- return self.__str__()
-
-
+ return FormatAsBits((self.output, self.out_boff))
@@ -5,6 +5,8 @@
# found in the LICENSE file.
import re
+import json
+import sys
def MakeDefaultHeaders(list_o_dicts, items_to_ignore=[]):
retval = {}
@@ -21,13 +23,27 @@ def MakeDefaultHeaders(list_o_dicts, items_to_ignore=[]):
retval[key] = val
return retval
+def EncodeStringsAsUTF8(x):
+ retval = {}
+ for k,v in x.iteritems():
+ n_k = k
+ if isinstance(k, unicode):
+ n_k = k.encode("utf8")
+ n_v = v
+ if isinstance(v, unicode):
+ n_v = v.encode("utf8")
+ retval[n_k] = n_v
+ return retval
+
def ReadHarFile(filename):
f = open(filename)
- null = None
- true = 1
- false = 0
- s = f.read()
- o = eval(s)
+ try:
+ o = json.loads(f.read(), object_hook=EncodeStringsAsUTF8)
+ # and now lets convert all strings to utf8.
+ except Exception as x:
+ print x
+ sys.exit("unable to parse: " + filename)
+
request_headers = []
response_headers = []
for entry in o["log"]["entries"]:
@@ -3,50 +3,92 @@
# found in the LICENSE file.
request_freq_table = [
- ('e', 3447), ('/', 3366), ('a', 3281), ('s', 3133), ('2', 3072), ('t', 2752),
- ('1', 2743), ('n', 2526), ('i', 2488), ('0', 2419), ('3', 2358), ('c', 2353),
- ('o', 2349), ('.', 2212), ('p', 2016), ('r', 2003), ('m', 1980), ('g', 1917),
- ('4', 1845), ('6', 1796), ('=', 1692), ('8', 1585), ('l', 1571), ('d', 1564),
- ('5', 1528), ('9', 1453), ('7', 1448), ('_', 1442), ('&', 1293), ('%', 1273),
- ('-', 1222), ('b', 1184), ('\x80', 1029), ('h', 988), ('u', 974), (',', 875),
- ('f', 802), ('j', 792), ('A', 771), ('w', 764), ('v', 763), ('F', 709),
- ('D', 651), ('y', 586), ('x', 564), ('k', 529), ('I', 489), ('G', 462),
- ('C', 399), ('S', 396), ('z', 358), ('V', 355), ('B', 354), ('U', 351),
- ('T', 339), ('L', 329), ('R', 315), ('E', 314), ('P', 313), ('q', 312),
- ('N', 306), ('M', 269), ('Z', 257), ('Y', 251), ('X', 250), ('H', 246),
- ('Q', 244), ('W', 243), ('?', 222), ('J', 212), ('O', 210), ('K', 207),
- (':', 147), (';', 122), (' ', 28), ('!', 27), ('(', 23), (')', 23), ('*', 18),
- ('+', 15), ('{', 11), ('}', 11), ('~', 4), ('$', 2), ("'", 2), ('[', 2),
- (']', 2), ('\x00', 0), ('\x01', 0), ('\x02', 0), ('\x03', 0), ('\x04', 0),
- ('\x05', 0), ('\x06', 0), ('\x07', 0), ('\x08', 0), ('\t', 0), ('\n', 0),
- ('\x0b', 0), ('\x0c', 0), ('\r', 0), ('\x0e', 0), ('\x0f', 0), ('\x10', 0),
- ('\x11', 0), ('\x12', 0), ('\x13', 0), ('\x14', 0), ('\x15', 0), ('\x16', 0),
- ('\x17', 0), ('\x18', 0), ('\x19', 0), ('\x1a', 0), ('\x1b', 0), ('\x1c', 0),
- ('\x1d', 0), ('\x1e', 0), ('\x1f', 0), ('"', 0), ('#', 0), ('<', 0), ('>', 0),
- ('@', 0), ('\\', 0), ('^', 0), ('`', 0), ('|', 0), ('\x7f', 0)
+ ('\x00', 0),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0),
+ ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0),
+ ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0),
+ ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0),
+ ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0),
+ ('\x1e', 0),('\x1f', 0),(' ', 61),('!', 9),('"', 0),('#', 0),('$', 2),
+ ('%', 1433),('&', 1662),("'", 2),('(', 34),(')', 34),('*', 25),('+', 4),
+ (',', 967),('-', 1379),('.', 2886),('/', 4511),('0', 3198),('1', 3331),
+ ('2', 3597),('3', 2691),('4', 2251),('5', 1880),('6', 2155),('7', 1639),
+ ('8', 1916),('9', 1728),(':', 171),(';', 214),('<', 0),('=', 2120),('>', 0),
+ ('?', 251),('@', 0),('A', 931),('B', 481),('C', 566),('D', 696),('E', 362),
+ ('F', 545),('G', 513),('H', 328),('I', 524),('J', 210),('K', 260),('L', 373),
+ ('M', 287),('N', 311),('O', 288),('P', 381),('Q', 291),('R', 328),('S', 543),
+ ('T', 434),('U', 386),('V', 372),('W', 295),('X', 216),('Y', 205),('Z', 199),
+ ('[', 2),('\\', 0),(']', 2),('^', 0),('_', 1702),('`', 0),('a', 4237),
+ ('b', 1601),('c', 3203),('d', 2392),('e', 4941),('f', 932),('g', 2297),
+ ('h', 1352),('i', 3233),('j', 913),('k', 630),('l', 2082),('m', 2429),
+ ('n', 3116),('o', 3286),('p', 2510),('q', 314),('r', 2646),('s', 3825),
+ ('t', 3486),('u', 1298),('v', 839),('w', 1172),('x', 760),('y', 705),
+ ('z', 352),('{', 12),('|', 12),('}', 12),('~', 4),('\x7f', 0),('\x80', 0),
+ ('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0),('\x86', 0),
+ ('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0),('\x8c', 0),
+ ('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0),('\x92', 0),
+ ('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0),('\x98', 0),
+ ('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0),('\x9e', 0),
+ ('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0),('\xa4', 0),
+ ('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0),('\xaa', 0),
+ ('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0),('\xb0', 0),
+ ('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0),('\xb6', 0),
+ ('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0),('\xbc', 0),
+ ('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0),('\xc2', 0),
+ ('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0),('\xc8', 0),
+ ('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0),('\xce', 0),
+ ('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0),('\xd4', 0),
+ ('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0),('\xda', 0),
+ ('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0),('\xe0', 0),
+ ('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0),('\xe6', 0),
+ ('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0),('\xec', 0),
+ ('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0),('\xf2', 0),
+ ('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0),('\xf8', 0),
+ ('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0),('\xfe', 0),
+ ('\xff', 0),(256, 1304),
]
response_freq_table = [
- (' ', 6174), ('2', 5697), ('1', 5419), ('0', 5023), ('\x80', 3416),
- ('3', 3138), ('4', 2973), ('e', 2931), ('5', 2624), ('a', 2619), ('8', 2577),
- ('6', 2440), ('9', 2371), ('7', 2361), (':', 2303), ('c', 2209), ('u', 2005),
- ('d', 2001), ('T', 1991), ('M', 1849), ('n', 1770), ('o', 1751), ('b', 1608),
- ('t', 1564), ('G', 1515), ('i', 1458), (',', 1437), ('r', 1405), ('A', 1387),
- ('g', 1352), ('f', 1343), ('l', 1189), ('=', 1147), ('F', 1136), ('p', 1109),
- ('s', 1087), ('m', 1050), ('C', 998), ('/', 975), ('-', 928), ('D', 919),
- ('E', 835), ('h', 834), ('x', 825), ('S', 815), ('J', 781), ('B', 738),
- ('w', 669), ('.', 648), ('v', 641), ('O', 633), ('W', 622), ('y', 617),
- ('"', 580), ('P', 561), ('N', 557), ('U', 554), ('k', 549), ('I', 543),
- ('j', 526), ('R', 508), ('L', 492), ('Z', 488), ('V', 482), ('K', 478),
- ('z', 478), ('Q', 476), ('Y', 476), ('q', 463), ('X', 454), ('H', 446),
- (';', 278), ('+', 276), ('_', 270), ('(', 156), (')', 156), ('&', 136),
- ('%', 123), ('\x00', 53), ('?', 16), ('|', 11), ('#', 9), ('{', 4), ('}', 4),
- ('*', 2), ('[', 2), (']', 2), ('!', 1), ('\x01', 0), ('\x02', 0),
- ('\x03', 0), ('\x04', 0), ('\x05', 0), ('\x06', 0), ('\x07', 0), ('\x08', 0),
- ('\t', 0), ('\n', 0), ('\x0b', 0), ('\x0c', 0), ('\r', 0), ('\x0e', 0),
- ('\x0f', 0), ('\x10', 0), ('\x11', 0), ('\x12', 0), ('\x13', 0), ('\x14', 0),
- ('\x15', 0), ('\x16', 0), ('\x17', 0), ('\x18', 0), ('\x19', 0), ('\x1a', 0),
- ('\x1b', 0), ('\x1c', 0), ('\x1d', 0), ('\x1e', 0), ('\x1f', 0), ('$', 0),
- ("'", 0), ('<', 0), ('>', 0), ('@', 0), ('\\', 0), ('^', 0), ('`', 0),
- ('~', 0), ('\x7f', 0)
+ ('\x00', 87),('\x01', 0),('\x02', 0),('\x03', 0),('\x04', 0),('\x05', 0),
+ ('\x06', 0),('\x07', 0),('\x08', 0),('\t', 0),('\n', 0),('\x0b', 0),
+ ('\x0c', 0),('\r', 0),('\x0e', 0),('\x0f', 0),('\x10', 0),('\x11', 0),
+ ('\x12', 0),('\x13', 0),('\x14', 0),('\x15', 0),('\x16', 0),('\x17', 0),
+ ('\x18', 0),('\x19', 0),('\x1a', 0),('\x1b', 0),('\x1c', 0),('\x1d', 0),
+ ('\x1e', 0),('\x1f', 0),(' ', 8277),('!', 0),('"', 948),('#', 9),('$', 0),
+ ('%', 191),('&', 203),("'", 2),('(', 191),(')', 191),('*', 6),('+', 300),
+ (',', 2522),('-', 2374),('.', 1325),('/', 3266),('0', 7630),('1', 7965),
+ ('2', 7636),('3', 4415),('4', 4337),('5', 3594),('6', 3253),('7', 3223),
+ ('8', 3920),('9', 3306),(':', 3545),(';', 421),('<', 0),('=', 1626),('>', 0),
+ ('?', 24),('@', 0),('A', 1644),('B', 820),('C', 1187),('D', 1116),('E', 954),
+ ('F', 1260),('G', 1955),('H', 493),('I', 674),('J', 875),('K', 560),
+ ('L', 544),('M', 2305),('N', 844),('O', 781),('P', 640),('Q', 537),('R', 555),
+ ('S', 965),('T', 2550),('U', 691),('V', 504),('W', 776),('X', 459),('Y', 507),
+ ('Z', 476),('[', 11),('\\', 0),(']', 11),('^', 0),('_', 436),('`', 0),
+ ('a', 5171),('b', 3355),('c', 4201),('d', 3265),('e', 5511),('f', 2185),
+ ('g', 2455),('h', 1166),('i', 3075),('j', 768),('k', 768),('l', 1980),
+ ('m', 1582),('n', 3613),('o', 3418),('p', 1864),('q', 532),('r', 2488),
+ ('s', 2906),('t', 3324),('u', 2433),('v', 1097),('w', 927),('x', 1169),
+ ('y', 749),('z', 506),('{', 9),('|', 13),('}', 9),('~', 0),('\x7f', 0),
+ ('\x80', 0),('\x81', 0),('\x82', 0),('\x83', 0),('\x84', 0),('\x85', 0),
+ ('\x86', 0),('\x87', 0),('\x88', 0),('\x89', 0),('\x8a', 0),('\x8b', 0),
+ ('\x8c', 0),('\x8d', 0),('\x8e', 0),('\x8f', 0),('\x90', 0),('\x91', 0),
+ ('\x92', 0),('\x93', 0),('\x94', 0),('\x95', 0),('\x96', 0),('\x97', 0),
+ ('\x98', 0),('\x99', 0),('\x9a', 0),('\x9b', 0),('\x9c', 0),('\x9d', 0),
+ ('\x9e', 0),('\x9f', 0),('\xa0', 0),('\xa1', 0),('\xa2', 0),('\xa3', 0),
+ ('\xa4', 0),('\xa5', 0),('\xa6', 0),('\xa7', 0),('\xa8', 0),('\xa9', 0),
+ ('\xaa', 0),('\xab', 0),('\xac', 0),('\xad', 0),('\xae', 0),('\xaf', 0),
+ ('\xb0', 0),('\xb1', 0),('\xb2', 0),('\xb3', 0),('\xb4', 0),('\xb5', 0),
+ ('\xb6', 0),('\xb7', 0),('\xb8', 0),('\xb9', 0),('\xba', 0),('\xbb', 0),
+ ('\xbc', 0),('\xbd', 0),('\xbe', 0),('\xbf', 0),('\xc0', 0),('\xc1', 0),
+ ('\xc2', 0),('\xc3', 0),('\xc4', 0),('\xc5', 0),('\xc6', 0),('\xc7', 0),
+ ('\xc8', 0),('\xc9', 0),('\xca', 0),('\xcb', 0),('\xcc', 0),('\xcd', 0),
+ ('\xce', 0),('\xcf', 0),('\xd0', 0),('\xd1', 0),('\xd2', 0),('\xd3', 0),
+ ('\xd4', 0),('\xd5', 0),('\xd6', 0),('\xd7', 0),('\xd8', 0),('\xd9', 0),
+ ('\xda', 0),('\xdb', 0),('\xdc', 0),('\xdd', 0),('\xde', 0),('\xdf', 0),
+ ('\xe0', 0),('\xe1', 0),('\xe2', 0),('\xe3', 0),('\xe4', 0),('\xe5', 0),
+ ('\xe6', 0),('\xe7', 0),('\xe8', 0),('\xe9', 0),('\xea', 0),('\xeb', 0),
+ ('\xec', 0),('\xed', 0),('\xee', 0),('\xef', 0),('\xf0', 0),('\xf1', 0),
+ ('\xf2', 0),('\xf3', 0),('\xf4', 0),('\xf5', 0),('\xf6', 0),('\xf7', 0),
+ ('\xf8', 0),('\xf9', 0),('\xfa', 0),('\xfb', 0),('\xfc', 0),('\xfd', 0),
+ ('\xfe', 0),('\xff', 0),(256, 5072),
]
+
Oops, something went wrong.

0 comments on commit d839d76

Please sign in to comment.