-
Notifications
You must be signed in to change notification settings - Fork 208
/
FuzzySearch.ts
211 lines (183 loc) · 7.94 KB
/
FuzzySearch.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
/*---------------------------------------------------------------------------------------------
* Copyright (c) Bentley Systems, Incorporated. All rights reserved.
* See LICENSE.md in the project root for license terms and full copyright notice.
*--------------------------------------------------------------------------------------------*/
/** @packageDocumentation
* @module Tools
*/
import Fuse from "fuse.js";
/** @public */
export class FuzzySearch<T> {
/** Override to provide non-standard FuseOptions for searches where the a single word pattern is used */
public onGetSingleWordSearchOptions(): Fuse.FuseOptions<T> {
return {
shouldSort: true,
threshold: 0.40,
location: 0,
distance: 100,
maxPatternLength: 32,
minMatchCharLength: 2,
includeMatches: true,
includeScore: true,
};
}
/** Override to provide non-standard FuseOptions for searches where the a multiple word pattern is used */
public onGetMultiWordSearchOptions(): Fuse.FuseOptions<T> {
return {
shouldSort: true,
threshold: 0.40,
tokenize: true,
matchAllTokens: true,
maxPatternLength: 32,
minMatchCharLength: 2,
includeMatches: true,
includeScore: true,
};
}
/** Call to conduct a fuzzy search of searchedObjects, looking at the 'key' member of each such object
* @param searchedObjects An array of objects to search.
* @param keys The name of the members to search in the searchedObjects.
* @param pattern The pattern for which each searchedObject is searched.
* @return FuzzySearchResults.
*/
public search(searchedObjects: T[], keys: Array<keyof T>, pattern: string): FuzzySearchResults<T> {
if (!pattern || pattern.length < 2)
return new FuzzySearchResults<T>(undefined);
// it is a multi-word pattern if there's a space other than at the end of the pattern.
const spaceIndex: number = pattern.indexOf(" ");
const multiWord: boolean = (-1 !== spaceIndex) && (spaceIndex !== (pattern.length - 1));
const options: Fuse.FuseOptions<T> = multiWord ? this.onGetMultiWordSearchOptions() : this.onGetSingleWordSearchOptions();
options.keys = keys;
const fuse = new Fuse(searchedObjects, options);
let results: any[] = fuse.search(pattern);
// We need to set the threshold fairly high to get results when the user misspells words (otherwise they are not returned),
// but doing that results in matches that don't make sense when there are "good" matches. So we discard matches where the match
// score increases by a large amount between results.
let checkScoreDelta: boolean = false;
let averageScoreDeltaThreshold = 1;
if (results.length > 30) {
averageScoreDeltaThreshold = ((results[results.length - 1].score - results[0].score) / results.length) * 10;
if (averageScoreDeltaThreshold > 0.01)
checkScoreDelta = true;
}
// Sometimes fuse returns results in the array where the matches array is empty. That seems like a bug to me, but it happens when
// the input is something like "fjt" and the string it matches is "fit". If we have more than three actual matches, we just truncate the set when we see one.
// The other use for this loop is to truncate when we see a dramatic increase in the score. The ones after are unlikely
// to be useful, so we truncate the results when we hit that point also.
for (let resultIndex = 0; resultIndex < results.length; resultIndex++) {
const thisResult = results[resultIndex];
if (0 === thisResult.matches.length) {
// here we have a result with no matches. If we have other matches, just discard this and the rest.
if (resultIndex > 2) {
results = results.slice(0, resultIndex);
break;
}
// otherwise we want to keep this result, but we have to add the matched value to the object because we can't get it from the matches array.
// we assume it came from the first key (usually there's only one anyway).
thisResult.matchedValue = thisResult.item[keys[0]];
thisResult.matchedKey = keys[0];
}
if (checkScoreDelta && (resultIndex > 0)) {
const resultScore = results[resultIndex].score;
if (resultScore < 0.101)
continue;
if ((resultScore - results[resultIndex - 1].score) > averageScoreDeltaThreshold) {
results = results.slice(0, resultIndex);
break;
}
}
}
// put the functions on each result so it fulfils the FuzzySearchResult interface.
for (const thisResult of results) {
thisResult.getResult = getResult.bind(thisResult);
thisResult.getBoldMask = getBoldMask.bind(thisResult);
thisResult.getMatchedKey = getMatchedKey.bind(thisResult);
thisResult.getMatchedValue = getMatchedValue.bind(thisResult);
}
return new FuzzySearchResults<T>(results);
}
}
/** Interface implemented by objects returned while iterating through FuzzySearchResults
* @public
* @extensions
*/
export interface FuzzySearchResult<T> {
/** Return the current result object */
getResult(): T;
/** Return the key found in this result object */
getMatchedKey(): string;
/** Return the value matched in this result object */
getMatchedValue(): string;
/** Return a boolean array that contains true for each letter in the matched value that was matched part of the search pattern */
getBoldMask(): boolean[];
}
/** Added to each result to support the FuzzySearchResult interface. */
function getResult(this: any) {
return this.item;
}
/** Added to each result to support the FuzzySearchResult interface. */
function getMatchedKey(this: any): string {
return (this.matches.length > 0) ? this.matches[0].key : this.matchedKey;
}
/** Added to each result to support the FuzzySearchResult interface. */
function getMatchedValue(this: any): string {
return (this.matches.length > 0) ? this.matches[0].value : this.matchedValue;
}
/** this function is added to each result to support the FuzzySearchResult interface. */
function getBoldMask(this: any): boolean[] | undefined {
if (this.boldMask)
return this.boldMask;
// if we had no matches, we return a bold mask with all false.
if (0 === this.matches.length) {
const noBoldMask = new Array<boolean>(this.matchedValue.length);
noBoldMask.fill(false);
return this.boldMask = noBoldMask;
}
// we have some matched portions.
const thisMatchedString: string = this.matches[0].value;
const valueLength = thisMatchedString.length;
const boldMask: boolean[] = new Array<boolean>(valueLength);
boldMask.fill(false);
const indicesArray: number[][] = this.matches[0].indices;
indicesArray.forEach((set: number[]) => {
for (let start = set[0], end = set[1]; start <= end; start++) {
boldMask[start] = true;
}
});
// cache it so if someone asks again we don't have to recalculate it.
return this.boldMask = boldMask;
}
/**
* This class is used to return the results of FuzzySearch.search. It is iterable, with each iteration
* returning an object implementing the FuzzySearchResult interface.
* @public
*/
export class FuzzySearchResults<T> implements Iterable<T> {
public results: any[];
constructor(results: any[] | undefined) {
this.results = [];
if (results)
this.results = results;
}
public [Symbol.iterator](): any { return new FuzzySearchResultsIterator(this); }
public get length(): number { return this.results.length; }
public getResult(resultIndex: number): FuzzySearchResult<T> | undefined {
if ((resultIndex < 0) || (resultIndex > this.results.length))
return undefined;
return this.results[resultIndex];
}
}
class FuzzySearchResultsIterator<T> {
public counter: number;
public fsr: FuzzySearchResults<T>;
constructor(fsr: FuzzySearchResults<T>) {
this.fsr = fsr;
this.counter = 0;
}
public next: any = () => {
return {
done: this.counter === this.fsr.results.length,
value: this.fsr.results[this.counter++] as FuzzySearchResult<T>,
};
};
}