Skip to content

Commit db9a2d6

Browse files
author
Ben Frederickson
committed
Make this benchmark less biased
1) Have MessagePack and JSON operate on the same data as pickle 2) include 'HIGHEST_PROTOCOL' as an option for the pickle test
1 parent 23282b8 commit db9a2d6

File tree

1 file changed

+25
-11
lines changed

1 file changed

+25
-11
lines changed

python-serialization/test_speeds.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy
1111
import gc
1212
import os
13+
import functools
1314

1415
import json
1516

@@ -36,11 +37,20 @@ def __init__(self, text=None, userId=None, timestamp=None, location=None):
3637
self.timestamp = timestamp
3738
self.location = location
3839

39-
def jsonableTweet(self):
40-
return {'text' : self.text,
41-
'userId' : self.userId,
42-
'timestamp' : self.timestamp,
43-
'location' : self.location}
40+
def toJSON(self):
41+
return json.dumps(self.__dict__)
42+
43+
@classmethod
44+
def fromJSON(cls, data):
45+
return cls(**json.loads(data))
46+
47+
def toMessagePack(self):
48+
return msgpack.packb(self.__dict__)
49+
50+
@classmethod
51+
def fromMessagePack(cls, data):
52+
return cls(**msgpack.unpackb(data))
53+
4454

4555
def thriftDumps(tweet, ProtocolClass=TBinaryProtocolAccelerated):
4656
buf = TMemoryBuffer()
@@ -67,7 +77,6 @@ def runTests():
6777
randomString(random.randint(10, 30)))
6878
for x in xrange(100000)]
6979

70-
jsondata = [jsonableTweet(d) for d in data]
7180
thriftdata = [ThriftTweet(d.text, d.userId, d.timestamp, d.location) for d in data]
7281

7382
minSize = numpy.average([len(d.text) + len(d.userId) + len(d.location) + 8 for d in data])
@@ -76,15 +85,20 @@ def runTests():
7685
methods = {
7786
'Pickle' : (pickle.dumps, pickle.loads, data),
7887
'cPickle' : (cPickle.dumps, cPickle.loads, data),
79-
'JSON' : (json.dumps, json.loads, jsondata),
80-
'MessagePack' : (msgpack.packb, msgpack.unpackb, jsondata),
88+
'cPickle' : (cPickle.dumps, cPickle.loads, data),
89+
'cPickle\n(Highest Protocol)' : (
90+
functools.partial(cPickle.dumps, protocol=cPickle.HIGHEST_PROTOCOL),
91+
cPickle.loads, data),
92+
'JSON' : (lambda d: d.toJSON(), Tweet.fromJSON, data),
93+
'MessagePack' : (lambda d: d.toMessagePack(),
94+
Tweet.fromMessagePack, data),
8195
'Thrift' : (thriftDumps, thriftLoads, thriftdata),
8296
}
83-
97+
8498
output = []
8599
for method, (packer, unpacker, inputData) in methods.items():
86100
gc.collect()
87-
101+
88102
startPack = time.time()
89103
packed = [packer(d) for d in inputData]
90104

@@ -106,7 +120,7 @@ def runTests():
106120
print "packTime", packTime, "s - ", len(inputData)/packTime, "items/s"
107121
print "unpackTime", unpackTime, "s - ", len(inputData)/unpackTime, "items/s"
108122
print "size", averageSize
109-
print
123+
print
110124
print
111125

112126
output.sort(key=lambda x: x['packRate'])

0 commit comments

Comments
 (0)