/
test_speeds.py
130 lines (103 loc) · 4.14 KB
/
test_speeds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
""" This script reports the time to serialize/deserialize a small object
from a bunch of different python serialization libraries.
The data being serialized represents a single 'Tweet' from twitter, and has
just 4 fields: text, userId, location and timestamp.
"""
import random
import time
import numpy
import gc
import os
import functools
import json
import cPickle
import pickle
import msgpack
from thrift.transport.TTransport import TMemoryBuffer
from thrift.protocol.TBinaryProtocol import TBinaryProtocolAccelerated, TBinaryProtocol
try:
from thriftobj.Tweet.ttypes import Tweet as ThriftTweet
except ImportError:
print "generating thrift objects"
os.system("thrift --gen py Tweet.thrift")
os.system("mv gen-py thriftobj")
from thriftobj.Tweet.ttypes import Tweet as ThriftTweet
class Tweet(object):
def __init__(self, text=None, userId=None, timestamp=None, location=None):
self.text = text
self.userId = userId
self.timestamp = timestamp
self.location = location
def toJSON(self):
return json.dumps(self.__dict__)
@classmethod
def fromJSON(cls, data):
return cls(**json.loads(data))
def toMessagePack(self):
return msgpack.packb(self.__dict__)
@classmethod
def fromMessagePack(cls, data):
return cls(**msgpack.unpackb(data))
def thriftDumps(tweet, ProtocolClass=TBinaryProtocolAccelerated):
buf = TMemoryBuffer()
protocol = ProtocolClass(buf)
tweet.write(protocol)
return buf.getvalue()
def thriftLoads(data, ProtocolClass=TBinaryProtocolAccelerated):
ret = ThriftTweet()
buf = TMemoryBuffer(data)
protocol = ProtocolClass(buf)
ret.read(protocol)
return ret
alphabet = map(chr, range(ord('a'), ord('z') + 1))
def randomString(length):
return ''.join(random.choice(alphabet) for _ in xrange(length))
def runTests():
print "generating data"
data = [Tweet(randomString(random.randint(10,140)),
randomString(random.randint(5, 20)),
int(time.time()),
randomString(random.randint(10, 30)))
for x in xrange(100000)]
thriftdata = [ThriftTweet(d.text, d.userId, d.timestamp, d.location) for d in data]
minSize = numpy.average([len(d.text) + len(d.userId) + len(d.location) + 8 for d in data])
print "generated data, size lower bound = ", minSize
methods = {
'Pickle' : (pickle.dumps, pickle.loads, data),
'cPickle' : (cPickle.dumps, cPickle.loads, data),
'cPickle' : (cPickle.dumps, cPickle.loads, data),
'cPickle\n(Highest Protocol)' : (
functools.partial(cPickle.dumps, protocol=cPickle.HIGHEST_PROTOCOL),
cPickle.loads, data),
'JSON' : (lambda d: d.toJSON(), Tweet.fromJSON, data),
'MessagePack' : (lambda d: d.toMessagePack(),
Tweet.fromMessagePack, data),
'Thrift' : (thriftDumps, thriftLoads, thriftdata),
}
output = []
for method, (packer, unpacker, inputData) in methods.items():
gc.collect()
startPack = time.time()
packed = [packer(d) for d in inputData]
startUnpack = time.time()
unpacked = [unpacker(d) for d in packed]
unpackTime = time.time() - startUnpack
packTime = startUnpack - startPack
averageSize = numpy.average([len(d) for d in packed])
output.append({'method' : method,
'packTime' : packTime,
'unpackTime' : unpackTime,
'packRate' : len(inputData)/packTime,
'unpackRate' : len(inputData)/unpackTime,
'averageSize' : averageSize})
print "-" * 80
print method
print "packTime", packTime, "s - ", len(inputData)/packTime, "items/s"
print "unpackTime", unpackTime, "s - ", len(inputData)/unpackTime, "items/s"
print "size", averageSize
print
print
output.sort(key=lambda x: x['packRate'])
open("speed_data.json", "wb").write(json.dumps(output))
if __name__ == "__main__":
runTests()