/
substring.rb
118 lines (98 loc) · 2.83 KB
/
substring.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
module Cacher
module Partial
# Generates the right substrings for use in the substring strategy.
#
class SubstringGenerator
attr_reader :from, :to
def initialize from, to
@from, @to = from, to
if @to.zero?
def each_subtoken token, &block
token.each_subtoken @from, &block
end
else
def each_subtoken token, &block
token[0..@to].intern.each_subtoken @from, &block
end
end
end
end
# The subtoken partial strategy.
#
# If given "florian"
# it will index "floria", "flori", "flor", "flo", "fl", "f"
# (Depending on what the given from value is, the example is with option from: 1)
#
class Substring < Strategy
# The from option signifies where in the symbol it
# will start in generating the subtokens.
#
# Examples:
#
# With :hello, and to: -1 (default)
# * from: 1 # => [:hello, :hell, :hel, :he, :h]
# * from: 4 # => [:hello, :hell]
#
# With :hello, and to: -2
# * from: 1 # => [:hell, :hel, :he, :h]
# * from: 4 # => [:hell]
#
def initialize options = {}
from = options[:from] || 1
to = options[:to] || -1
@generator = SubstringGenerator.new from, to
end
# Delegator to generator#from.
#
def from
@generator.from
end
# Delegator to generator#to.
#
def to
@generator.to
end
# Generates a partial index from the given index.
#
def generate_from index
result = {}
# Generate for each key token the subtokens.
#
i = 0
index.each_key do |token|
i += 1
if i == 5000
timed_exclaim "Generating partial tokens for token #{token}. This appears every 5000 tokens."
i = 0
end
generate_for token, index, result
end
# Remove duplicate ids.
#
# THINK If it is unique for a subtoken, it is
# unique for all derived longer tokens.
#
result.each_value &:uniq!
result
end
private
# To each shortened token of :test
# :test, :tes, :te, :t
# add all ids of :test
#
# "token" here means just text.
#
# THINK Could be improved by appending the aforegoing ids?
#
def generate_for token, index, result
@generator.each_subtoken(token) do |subtoken|
if result[subtoken]
result[subtoken] += index[token] # unique
else
result[subtoken] = index[token].dup
end
end
end
end
end
end