-
Notifications
You must be signed in to change notification settings - Fork 32
/
search.go
252 lines (217 loc) · 6.36 KB
/
search.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
// Copyright 2015, David Howden
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package index
import (
"fmt"
"strings"
"unicode"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
func isMn(r rune) bool {
return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
}
var transformer = transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
func removeNonAlphaNumeric(s string) string {
in := []rune(s)
res := make([]rune, len(in))
i := 0
for _, x := range s {
switch {
case x == '-':
res[i] = ' '
i++
case unicode.IsLetter(x), unicode.IsDigit(x), unicode.IsSpace(x):
res[i] = unicode.ToLower(x)
i++
}
}
result, _, _ := transform.String(transformer, string(res[:i]))
return result
}
// Searcher is an interface which defines the Search method.
type Searcher interface {
// Search uses the given string to filter a list of paths.
Search(string) []Path
}
// WordIndex is an interface which defines the Words method.
type WordIndex interface {
Searcher
// Words returns all the words in the index.
Words() []string
}
type wordIndex struct {
fields []string
words map[string][]Path // mapping from word -> paths
}
func (s *wordIndex) Words() []string {
words := make([]string, 0, len(s.words))
for k := range s.words {
words = append(words, k)
}
return words
}
// Search returns a list of paths for the given word.
func (s *wordIndex) Search(w string) []Path {
return s.words[w]
}
// AddWord adds the given string (assumed to be a single word) to the index.
func (s *wordIndex) AddWord(w string, p Path) {
s.words[w] = append(s.words[w], p)
}
// AddString adds the string to the index, removing non-alphanumeric characters,
// normalising modified characters, and splitting into words.
func (s *wordIndex) AddString(x string, p Path) {
x = removeNonAlphaNumeric(x)
w := strings.Fields(x)
for _, x := range w {
s.AddWord(x, p)
}
}
// Expander is an interface which implements the Expand method.
type Expander interface {
// Expand the given string into a list of alternatives.
Expand(string) []string
}
// MinPrefix is the minimum number of characters that can be used in a prefix.
const MinPrefix = 3
// PrefixMultiExpand is a type which implements Expander
type PrefixMultiExpand struct {
words map[string][]string
size int
}
// BuildPrefixMultiExpander builds an Expander with given length n. All words mapped to by prefixes will
// have length greater than or equal to MinPrefix.
func BuildPrefixMultiExpander(words []string, n int) PrefixMultiExpand {
if n < MinPrefix {
panic(fmt.Sprintf("Size must be greater than MinPrefix (%d)", MinPrefix))
}
m := make(map[string][]string)
for _, w := range words {
if len(w) >= MinPrefix {
last := n
if len(w) <= n {
last = len(w)
}
for i := MinPrefix; i <= last; i++ {
m[w[:i]] = append(m[w[:i]], w)
}
}
}
return PrefixMultiExpand{
words: m,
size: n,
}
}
// Expand uses the prefix mapping to return a list of words which can be expanded from s.
func (p PrefixMultiExpand) Expand(s string) []string {
if len(s) < MinPrefix {
return []string{s}
}
if len(s) > p.size {
// TODO: filter this with edit distance
return p.words[s[:p.size]]
}
return p.words[s]
}
// expandSearcher is an implementation of Searcher which applies the Expander to search
// input and then performs a search on each of the expanded outputs, union the results
// and returns as the Search result.
type expandSearcher struct {
Expander
Searcher
}
// Search implements Searcher, and uses the internal Expander to expand all words
// in the search expression and unions the results (deduping). Returns nil when
// the search term isn't above the MinPrefix.
func (es *expandSearcher) Search(s string) []Path {
e := es.Expander.Expand(s)
ps := make([][]Path, len(e))
for i, w := range e {
ps[i] = es.Searcher.Search(w)
}
return Union(ps...)
}
// BuildPrefixExpandSearcher constructs a prefix expander which wraps the given Searcher
// by expanding each word in the search input using the WordIndex.
func BuildPrefixExpandSearcher(s Searcher, w WordIndex, n int) Searcher {
return &expandSearcher{BuildPrefixMultiExpander(w.Words(), n), s}
}
type trackWordIndex struct {
*wordIndex
fields []string
}
// AddCollection recursively adds all Groups within the collection to the word index.
func (w *trackWordIndex) AddCollection(c Collection, p Path) {
for _, k := range c.Keys() {
np := make(Path, len(p), len(p)+1)
copy(np, p)
np = append(np, k)
w.AddGroup(c.Get(k), np)
}
}
// AddGroup adds the Group tracks to the word index, using the Path as root.
func (w *trackWordIndex) AddGroup(g Group, p Path) {
if c, ok := g.(Collection); ok {
w.AddCollection(c, p)
return
}
// for i, t := range g.Tracks() {
for _, t := range g.Tracks() {
// np := make(Path, len(p), len(p)+1)
// copy(p, np)
// np = append(np, strconv.Itoa(i))
for _, f := range w.fields {
// s.AddString(t.GetString(f), np)
w.AddString(t.GetString(f), p)
}
}
}
// BuildCollectionWordIndex creates a WordIndex using the given Collection, taking data from
// the given fields.
func BuildCollectionWordIndex(c Collection, fields []string) WordIndex {
w := &trackWordIndex{
wordIndex: &wordIndex{
words: make(map[string][]Path),
},
fields: fields,
}
w.AddGroup(c, Path([]Key{"Root"}))
return w
}
type wordSearchIntersect struct {
Searcher
min int // the minimum input string length before search returns something non-trivial.
}
func (s *wordSearchIntersect) Search(x string) []Path {
if len(x) < s.min {
return make([]Path, 0)
}
words := strings.Fields(x)
paths := make([][]Path, 0, len(words))
for _, w := range words {
if len(w) >= s.min {
paths = append(paths, s.Searcher.Search(w))
}
}
return OrderedIntersection(paths...)
}
// WordsSearchIntersect calls Search on the Searcher for each word in the input string
// and then returns the ordered intersection (Paths are ordered by the number of times
// they appear).
func WordsIntersectSearcher(s Searcher) Searcher {
return &wordSearchIntersect{
Searcher: s,
min: 3,
}
}
// FlatSearcher is a Searcher wrapper which flattens input strings (replaces any accented
// characters with their un-accented equivalents).
type FlatSearcher struct {
Searcher
}
// Search implements Searcher.
func (f FlatSearcher) Search(s string) []Path {
return f.Searcher.Search(removeNonAlphaNumeric(s))
}