/
index.rb
193 lines (155 loc) · 4.72 KB
/
index.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# encoding: utf-8
#
require 'csv'
require 'sinatra/base'
require File.expand_path '../../lib/picky', __FILE__
class Object; def timed_exclaim(_); end end
def performance_of
if block_given?
code = Proc.new
GC.disable
t0 = Time.now
code.call
t1 = Time.now
GC.enable
(t1 - t0)
else
raise "#performance_of needs a block"
end
end
class Source
attr_reader :amount
def initialize amount
@amount = amount
end
Thing = Struct.new :id, :text1, :text2, :text3, :text4
def prepare
@buffer = []
i = 0
CSV.open('data.csv').each do |args|
@buffer << Thing.new(*args)
break if (i+=1) == @amount
end
end
def each up_to = nil, &block
@buffer[0..(up_to || amount)].each &block
end
end
include Picky
backends = [
["immediate Redis", Backends::Redis.new(immediate: true), 100],
["immediate SQLite", Backends::SQLite.new(self_indexed: true), 100],
["standard Redis", Backends::Redis.new, 200],
["standard SQLite", Backends::SQLite.new, 200],
["standard File", Backends::File.new, 300],
["standard Memory", Backends::Memory.new, 500],
]
constant_weight = Picky::Weights::Constant.new
no_partial = Picky::Partial::None.new
full_partial = Picky::Partial::Postfix.new from: 1
double_meta = Picky::Similarity::DoubleMetaphone.new 3
definitions = []
definitions << [Proc.new do
category :text1, weights: constant_weight, partial: no_partial
category :text2, weights: constant_weight, partial: no_partial
category :text3, weights: constant_weight, partial: no_partial
category :text4, weights: constant_weight, partial: no_partial
end, :no_weights_no_partial_default_similarity]
definitions << [Proc.new do
category :text1, weights: constant_weight
category :text2, weights: constant_weight
category :text3, weights: constant_weight
category :text4, weights: constant_weight
end, :no_weights_default_partial_default_similarity]
definitions << [Proc.new do
category :text1
category :text2
category :text3
category :text4
end, :default_weights_default_partial_default_similarity]
definitions << [Proc.new do
category :text1, partial: full_partial
category :text2, partial: full_partial
category :text3, partial: full_partial
category :text4, partial: full_partial
end, :default_weights_full_partial_no_similarity]
definitions << [Proc.new do
category :text1, similarity: double_meta
category :text2, similarity: double_meta
category :text3, similarity: double_meta
category :text4, similarity: double_meta
end, :default_weights_default_partial_double_metaphone_similarity]
definitions << [Proc.new do
category :text1, partial: full_partial, similarity: double_meta
category :text2, partial: full_partial, similarity: double_meta
category :text3, partial: full_partial, similarity: double_meta
category :text4, partial: full_partial, similarity: double_meta
end, :default_weights_full_partial_double_metaphone_similarity]
puts
puts
puts "All measurements in processed per second!"
source = Source.new 500
source.prepare
ram = ->() do
# Demeter is rotating in his grave :D
#
`ps u`.split("\n").select { |line| line.include? __FILE__ }.first.split(/\s+/)[5].to_i
end
string = ->() do
i = 0
ObjectSpace.each_object(String) do |s|
# puts s
i += 1
end
i
end
runs = ->() do
GC::Profiler.result.match(/\d+/)[0].to_i
end
GC.enable
GC::Profiler.enable
backends.each do |backend_description, backend, amount|
puts
print "Running tests with #{backend_description} with #{"%5d" % amount} indexed:"
puts " add/index | dump | total RAM/string/symbols per indexed"
definitions.each do |definition, description|
print "%65s" % description
print ": "
Indexes.clear_indexes
data = Index.new :m, &definition
data.source []
data.backend backend
GC.start
initial_ram = ram.call
initial_strings = string.call
initial_symbols = Symbol.all_symbols.size
last_gc = runs.call
add_duration = performance_of do
source.each(amount) do |thing|
data.add thing # direct
end
end
current_ram = ram.call - initial_ram
strings = string.call
symbols = Symbol.all_symbols.size
print "%7.0f" % (amount / add_duration)
print " | "
dump_duration = performance_of do
data.dump
end
print "%7.0f" % (amount / dump_duration)
print " | "
print "%7.0f" % (amount / (add_duration + dump_duration))
print " "
print "%5d" % (current_ram / amount)
print "K "
print "%6.1f" % ((strings - initial_strings) / amount.to_f)
print " Strings "
print "%6.1f" % ((symbols - initial_symbols) / amount.to_f)
print " Symbols "
print "GC extra: "
print "%2d" % (runs.call - last_gc)
puts
data.clear
end
end