Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Make this benchmark less biased

1) Have MessagePack and JSON operate on the same data as pickle

2) include 'HIGHEST_PROTOCOL' as an option for the pickle test
commit db9a2d6654b3f75ac0c4f9c4bb84ce4e2f1c17f6 1 parent 23282b8
@benfred authored
Showing with 25 additions and 11 deletions.
  1. +25 −11 python-serialization/test_speeds.py
View
36 python-serialization/test_speeds.py
@@ -10,6 +10,7 @@
import numpy
import gc
import os
+import functools
import json
@@ -36,11 +37,20 @@ def __init__(self, text=None, userId=None, timestamp=None, location=None):
self.timestamp = timestamp
self.location = location
-def jsonableTweet(self):
- return {'text' : self.text,
- 'userId' : self.userId,
- 'timestamp' : self.timestamp,
- 'location' : self.location}
+ def toJSON(self):
+ return json.dumps(self.__dict__)
+
+ @classmethod
+ def fromJSON(cls, data):
+ return cls(**json.loads(data))
+
+ def toMessagePack(self):
+ return msgpack.packb(self.__dict__)
+
+ @classmethod
+ def fromMessagePack(cls, data):
+ return cls(**msgpack.unpackb(data))
+
def thriftDumps(tweet, ProtocolClass=TBinaryProtocolAccelerated):
buf = TMemoryBuffer()
@@ -67,7 +77,6 @@ def runTests():
randomString(random.randint(10, 30)))
for x in xrange(100000)]
- jsondata = [jsonableTweet(d) for d in data]
thriftdata = [ThriftTweet(d.text, d.userId, d.timestamp, d.location) for d in data]
minSize = numpy.average([len(d.text) + len(d.userId) + len(d.location) + 8 for d in data])
@@ -76,15 +85,20 @@ def runTests():
methods = {
'Pickle' : (pickle.dumps, pickle.loads, data),
'cPickle' : (cPickle.dumps, cPickle.loads, data),
- 'JSON' : (json.dumps, json.loads, jsondata),
- 'MessagePack' : (msgpack.packb, msgpack.unpackb, jsondata),
+ 'cPickle' : (cPickle.dumps, cPickle.loads, data),
+ 'cPickle\n(Highest Protocol)' : (
+ functools.partial(cPickle.dumps, protocol=cPickle.HIGHEST_PROTOCOL),
+ cPickle.loads, data),
+ 'JSON' : (lambda d: d.toJSON(), Tweet.fromJSON, data),
+ 'MessagePack' : (lambda d: d.toMessagePack(),
+ Tweet.fromMessagePack, data),
'Thrift' : (thriftDumps, thriftLoads, thriftdata),
}
-
+
output = []
for method, (packer, unpacker, inputData) in methods.items():
gc.collect()
-
+
startPack = time.time()
packed = [packer(d) for d in inputData]
@@ -106,7 +120,7 @@ def runTests():
print "packTime", packTime, "s - ", len(inputData)/packTime, "items/s"
print "unpackTime", unpackTime, "s - ", len(inputData)/unpackTime, "items/s"
print "size", averageSize
- print
+ print
print
output.sort(key=lambda x: x['packRate'])
Please sign in to comment.
Something went wrong with that request. Please try again.