-
Notifications
You must be signed in to change notification settings - Fork 9
/
demonstrate_speed.py
166 lines (116 loc) · 4.57 KB
/
demonstrate_speed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import random
import time
import pickle
from deltas import segment_matcher, sequence_matcher
from deltas.segmenters import ParagraphsSentencesAndWhitespace
from deltas.tokenizers import wikitext_split, text_split
from mw import api
segmenter = ParagraphsSentencesAndWhitespace()
session = api.Session("https://en.wikipedia.org/w/api.php")
common1 = session.revisions.get(638029546, properties={"content"})['*']
common2 = session.revisions.get(638077284, properties={"content"})['*']
common1_tokens = list(wikitext_split.tokenize(common1))
common2_tokens = list(wikitext_split.tokenize(common2))
words = [line.strip() for line in open('/usr/share/dict/words')]
random1 = ''.join(random.choice(words) if t.type == "word" else str(t)
for t in common1_tokens)
random2 = ''.join(random.choice(words) if t.type == "word" else str(t)
for t in common1_tokens)
random2_tokens = list(wikitext_split.tokenize(random2))
random1_tokens = list(wikitext_split.tokenize(random1))
print("Tokenizing:")
def tokenize_common():
start = time.time()
for _ in range(50):
list(text_split.tokenize(common1))
print("\ttext_split: {0}".format((time.time() - start)/50))
start = time.time()
for _ in range(50):
list(wikitext_split.tokenize(common1))
print("\twikitext_split: {0}".format((time.time() - start)/50))
tokenize_common()
# profile.run('segment_common()', sort="cumulative")
print("Pickling segments:")
def segments_pickle():
segments = segmenter.segment(common1_tokens)
pickled_segments = pickle.dumps(segments)
start = time.time()
for _ in range(25):
pickled_segments = pickle.dumps(segments)
print("\tpickling: {0}".format((time.time() - start)/25))
for _ in range(25):
pickle.loads(pickled_segments)
print("\tunpickling: {0}".format((time.time() - start)/25))
segments_pickle()
# profile.run('segment_common()', sort="cumulative")
print("Running sequence matcher (LCS):")
def sequence_common():
start = time.time()
for _ in range(25):
list(sequence_matcher.diff(
common1_tokens, common2_tokens))
print("\tcommon: {0}".format((time.time() - start)/25))
sequence_common()
# profile.run('sequence_common()', sort="cumulative")
def sequence_random():
start = time.time()
for _ in range(25):
list(sequence_matcher.diff(
random1_tokens, random2_tokens))
print("\trandom: {0}".format((time.time() - start)/25))
# sequence_random()
# profile.run('sequence_random()', sort="cumulative")
print("Segmenting:")
def segment_common():
start = time.time()
for _ in range(25):
list(segmenter.segment(common1_tokens))
print("\tcommon: {0}".format((time.time() - start)/25))
segment_common()
# profile.run('segment_common()', sort="cumulative")
print("Running segment matcher:")
def segment_common():
start = time.time()
for _ in range(25):
list(segment_matcher.diff(common1_tokens, common2_tokens))
print("\tcommon: {0}".format((time.time() - start)/25))
segment_common()
# profile.run('segment_common()', sort="cumulative")
def segment_common_fast():
start = time.time()
sm = segment_matcher.SegmentMatcher()
processor = sm.processor()
for _ in range(25):
list(processor.process(common1))
list(processor.process(common2))
print("\tcommon_fast: {0}".format((time.time() - start)/50))
segment_common_fast()
# profile.run('segment_common()', sort="cumulative")
def segment_random():
start = time.time()
for _ in range(25):
list(segment_matcher.diff(random1_tokens, random2_tokens))
print("\trandom: {0}".format((time.time() - start)/25))
# segment_random()
# profile.run('segment_random()', sort="cumulative")
common1_segments = segmenter.segment(common1_tokens)
common2_segments = segmenter.segment(common2_tokens)
random1_segments = segmenter.segment(random1_tokens)
random2_segments = segmenter.segment(random2_tokens)
print("Running segment matcher (post segmentation):")
def segment_common_seg():
start = time.time()
for _ in range(25):
list(segment_matcher.diff_segments(
common1_segments, common2_segments))
print("\tcommon: {0}".format((time.time() - start)/25))
segment_common_seg()
# profile.run('segment_common_seg()', sort="cumulative")
def segment_random_seg():
start = time.time()
for _ in range(25):
list(segment_matcher.diff_segments(
random1_segments, random2_segments))
print("\trandom: {0}".format((time.time() - start)/25))
# segment_random_seg()
# profile.run('segment_random()', sort="cumulative")