1010import numpy
1111import gc
1212import os
13+ import functools
1314
1415import json
1516
@@ -36,11 +37,20 @@ def __init__(self, text=None, userId=None, timestamp=None, location=None):
3637 self .timestamp = timestamp
3738 self .location = location
3839
39- def jsonableTweet (self ):
40- return {'text' : self .text ,
41- 'userId' : self .userId ,
42- 'timestamp' : self .timestamp ,
43- 'location' : self .location }
40+ def toJSON (self ):
41+ return json .dumps (self .__dict__ )
42+
43+ @classmethod
44+ def fromJSON (cls , data ):
45+ return cls (** json .loads (data ))
46+
47+ def toMessagePack (self ):
48+ return msgpack .packb (self .__dict__ )
49+
50+ @classmethod
51+ def fromMessagePack (cls , data ):
52+ return cls (** msgpack .unpackb (data ))
53+
4454
4555def thriftDumps (tweet , ProtocolClass = TBinaryProtocolAccelerated ):
4656 buf = TMemoryBuffer ()
@@ -67,7 +77,6 @@ def runTests():
6777 randomString (random .randint (10 , 30 )))
6878 for x in xrange (100000 )]
6979
70- jsondata = [jsonableTweet (d ) for d in data ]
7180 thriftdata = [ThriftTweet (d .text , d .userId , d .timestamp , d .location ) for d in data ]
7281
7382 minSize = numpy .average ([len (d .text ) + len (d .userId ) + len (d .location ) + 8 for d in data ])
@@ -76,15 +85,20 @@ def runTests():
7685 methods = {
7786 'Pickle' : (pickle .dumps , pickle .loads , data ),
7887 'cPickle' : (cPickle .dumps , cPickle .loads , data ),
79- 'JSON' : (json .dumps , json .loads , jsondata ),
80- 'MessagePack' : (msgpack .packb , msgpack .unpackb , jsondata ),
88+ 'cPickle' : (cPickle .dumps , cPickle .loads , data ),
89+ 'cPickle\n (Highest Protocol)' : (
90+ functools .partial (cPickle .dumps , protocol = cPickle .HIGHEST_PROTOCOL ),
91+ cPickle .loads , data ),
92+ 'JSON' : (lambda d : d .toJSON (), Tweet .fromJSON , data ),
93+ 'MessagePack' : (lambda d : d .toMessagePack (),
94+ Tweet .fromMessagePack , data ),
8195 'Thrift' : (thriftDumps , thriftLoads , thriftdata ),
8296 }
83-
97+
8498 output = []
8599 for method , (packer , unpacker , inputData ) in methods .items ():
86100 gc .collect ()
87-
101+
88102 startPack = time .time ()
89103 packed = [packer (d ) for d in inputData ]
90104
@@ -106,7 +120,7 @@ def runTests():
106120 print "packTime" , packTime , "s - " , len (inputData )/ packTime , "items/s"
107121 print "unpackTime" , unpackTime , "s - " , len (inputData )/ unpackTime , "items/s"
108122 print "size" , averageSize
109- print
123+ print
110124 print
111125
112126 output .sort (key = lambda x : x ['packRate' ])
0 commit comments