-
Notifications
You must be signed in to change notification settings - Fork 109
/
perftesting.py
183 lines (154 loc) · 5.51 KB
/
perftesting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from time import time
import random
import string
from pymining.itemmining import _fpgrowth, get_fptree, _relim,\
get_relim_input, _sam, get_sam_input
from pymining.compat import range
def get_default_transactions():
'''Returns a small list of transactions. For testing purpose.'''
return (
('a', 'd'),
('a', 'c', 'd', 'e'),
('b', 'd'),
('b', 'c', 'd'),
('b', 'c'),
('a', 'b', 'd'),
('b', 'd', 'e'),
('b', 'c', 'd', 'e'),
('b', 'c'),
('a', 'b', 'd')
)
def get_default_transactions_alt():
'''Returns a small list of transactions. For testing purpose.'''
return (
('a', 'b'),
('b', 'c', 'd'),
('a', 'c', 'd', 'e'),
('a', 'd', 'e'),
('a', 'b', 'c'),
('a', 'b', 'c', 'd'),
('a'),
('a', 'b', 'c'),
('a', 'b', 'd'),
('b', 'c', 'e'),
)
def get_default_sequences():
'''Returns a small list of sequences. For testing purpose.'''
return ('caabc', 'abcb', 'cabc', 'abbca')
def get_random_transactions(
transaction_number=500,
max_item_per_transaction=100, max_key_length=50,
key_alphabet=string.ascii_letters, universe_size=1000):
'''Generates a random list of `transaction_number` transactions containing
from 0 to `max_item_per_transaction` from a collection of
`universe_size`. Each key has a maximum length of `max_key_length` and
is computed from a sequence of characters specified by `key_alphabet`
(default is ascii letters).
If `key_alphabet` is None, range(universize_size) is used as the
alphabet and `max_key_length` is ignored.
'''
if key_alphabet is None:
words = list(range(universe_size))
else:
words = []
for _ in range(universe_size):
word = ''.join((
random.choice(key_alphabet) for x in
range(random.randint(1, max_key_length))))
words.append(word)
transactions = []
for _ in range(transaction_number):
transaction = {
word for word in
random.sample(words, random.randint(0, max_item_per_transaction))
}
transactions.append(transaction)
return transactions
def test_sam(should_print=False, ts=None, support=2):
if ts is None:
ts = get_default_transactions()
sam_input = get_sam_input(ts, lambda e: e)
fis = set()
report = {}
n = _sam(sam_input, fis, report, support)
if should_print:
print(n)
print(report)
return (n, report)
def test_relim(should_print=False, ts=None, support=2):
if ts is None:
ts = get_default_transactions()
relim_input = get_relim_input(ts, lambda e: e)
fis = set()
report = {}
n = _relim(relim_input, fis, report, support)
if should_print:
print(n)
print(report)
return (n, report)
def test_fpgrowth(should_print=False, ts=None, support=2, pruning=False):
if ts is None:
ts = get_default_transactions()
fptree = get_fptree(ts, lambda e: e, support)
fis = set()
report = {}
n = _fpgrowth(fptree, fis, report, support, pruning)
if should_print:
print(n)
print(report)
return (n, report)
def test_itemset_perf(perf_round=10, sparse=True, seed=None):
'''Non-scientifically tests the performance of three algorithms by running
`perf_round` rounds of FP-Growth, FP-Growth without pruning, Relim, and
SAM.
A random set of transactions is created (the same is obviously used
for all algorithms).
If `sparse` is False, the random transactions are more dense, i.e., some
elements appear in almost all transactions.
The `seed` parameter can be used to obtain the same sample across
multiple calls.
'''
random.seed(seed)
if sparse:
universe_size = 2000
transaction_number = 500
support = 10
else:
universe_size = 110
transaction_number = 75
support = 25
transactions = get_random_transactions(
transaction_number=transaction_number,
universe_size=universe_size,
key_alphabet=None)
print('Random transactions generated with seed {0}\n'.format(seed))
start = time()
for i in range(perf_round):
(n, report) = test_fpgrowth(
False, transactions, support, pruning=True)
print('Done round {0}'.format(i))
end = time()
print('FP-Growth (pruning on) took: {0}'.format(end - start))
print('Computed {0} frequent item sets.'.format(n))
start = time()
for i in range(perf_round):
(n, report) = test_fpgrowth(
False, transactions, support, pruning=False)
print('Done round {0}'.format(i))
end = time()
print('FP-Growth (pruning off) took: {0}'.format(end - start))
print('Computed {0} frequent item sets.'.format(n))
start = time()
for i in range(perf_round):
(n, report) = test_relim(False, transactions, support)
print('Done round {0}'.format(i))
end = time()
print('Relim took: {0}'.format(end - start))
print('Computed {0} frequent item sets.'.format(n))
start = time()
for i in range(perf_round):
(n, report) = test_sam(False, transactions, support)
print('Done round {0}'.format(i))
end = time()
print('Sam took: {0}'.format(end - start))
print('Computed {0} frequent item sets.'.format(n))