Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions src/common/fuzzy_search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import * as assert from 'assert';

export enum StringMatchType {
/** Marks matched part of value string + query string */
MATCHED,

/** Marks unmatched (skipped) part of value string */
UNMATCHED,

/** Marks unmatched part of query string; probably indicates an error if present. */
ERROR_UNMATCHED_QUERY
}
export type SpanifyResult = [StringMatchType, String];
export type SpanifyResults = Array<SpanifyResult>;


/** Generic interface for any string matching algorithm */
export interface IStringMatcher {
/**
* Matches a value string against a query string.
*
* @param query The query string (eg. "fzsrch")
* @param value The value we're checking (eg. "fuzzy search")
* @returns true iff the passed in value string matches the query string according to the algorithm we're implementing.
*/
match (query: String, value: String): boolean;

/**
* Spanifies (ie. breaks into sub-strings / matched tokens) a string to visualize how it's being
* matched by match. Assumes that you've already called match() to determine whether the string matches or not.
*
* @param query The query string (eg. "fzsrch")
* @param value The value we're checking (eg. "fuzzy search")
* @param results An external, cached array that we can store our results in.
* Will have its length set to zero, then appended to.
* Not returning a _new_ (allocated) array, b/c that would be wasteful and we'd like to minimize gc.
* @returns true iff matched everything, false otherwise
*/
spanify (query: String, value: String, results: SpanifyResults): boolean;

/**
* @returns a new SpanifyResults() (ie. new Array<[StringMatchTYpe, String]>()).
* Convenience function since otherwise this would be annoying.
*/
newSpanifyResults (): SpanifyResults;
}

export interface IStringPrioritizer {
priority (value: String): Number;
}
export interface IStringSorter {
sort (values: String, prioritizer: IStringPrioritizer);
}

export class StringSearchFilterer {
private sorter: IStringSorter;
private filterer: IStringMatcher | null;
private spanResults: SpanifyResults | null;

constructor (sorter: IStringSorter, filterer: IStringMatcher | null) {
this.sorter = sorter;
this.filterer = filterer;
this.spanResults = null;
}
}





/**
* Implements a fuzzy string search algorithm that returns true if query is a strictly ordered subset of value.
*
* @example
* ```typescript
* let fuzzy = new FuzzyMatcher();
* assert(fuzzy.match("fzy", "fuzzy"));
* assert(!fuzzy.match("fyz", "fuzzy"));
* ```
* @exmaple
* ```typescript
* let results = new SpanifyResults();
* fuzzy.spanify("fzy", "fuzzy", results);
* console.log('Matching "fuzzy" against "fzy"');
* results.forEach(function(result){
* let match = result[0], substring = result[1];
* console.log(''+match+': "'+substring+'"");
* });
* ```
*/
export class FuzzyMatcher implements IStringMatcher {
match (query: String, value: String): boolean {
let i = value.length, j = query.length;
while (j != 0 && i >= j) {
if (value[i - 1] == query[j - 1]) {
--j;
}
--i;
}
return j == 0;
}
spanify (query: String, value: String, results: SpanifyResults): boolean {
results.length = 0;

// Initial spanify function.
// This code is probably wrong (have yet to test, b/c didn't bother getting
// typescript setup w/ npm...)

let i = 0, j = 0, n = value.length, m = query.length;
while (i < n && j < m) {
if (value[i] == query[j]) {
let i0 = i;
while (i < n && j < m && value[i] == query[j]) {
++i, ++j;
}
results.push([ StringMatchType.MATCHED, value.substr(i0, i - i0) ]);
} else {
let i0 = i, j0 = j;
while (i < n && value[i] != query[j0]) ++i;
while (j < m && value[i0] != query[j]) ++j;
if ((i - i0) < (j - j0)) {
results.push([ StringMatchType.UNMATCHED, value.substr(i0, i - i0) ]);
j = j0;
} else {
results.push([ StringMatchType.ERROR_UNMATCHED_QUERY, query.substr(j0, j - j0) ]);
i = i0;
}
}
}
return i == n && j == m;
}
newSpanifyResults (): SpanifyResults {
return new Array<[StringMatchType, String]>();
}
}

export class SubstringMatcher implements IStringMatcher {
match (query: String, value: String): boolean {
return false;
}
}


90 changes: 90 additions & 0 deletions src/common/matcher.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@

export interface IStatefulStringMatcher<T> {
setData (values: Array<T>);
setQuery (query: String);
length (): number;
take (count: number): Array<T>;
takeNext (start: number): T;
takeSlice (start: number, count: number): Array<T>;
}

class Prioritizable {
priority: number = 0;
}

/** To use with edit distance algorithms like levehnstein */
export class SortingStringMatcher<T extends Prioritizable> implements IStatefulStringMatcher<T> {
private values: Array<T>;
private count: number;
private query: String;
private dirty: Boolean;
private prioritize: (T, String) => number;

constructor (values: Array<T>, prioritize: (T, String) => number) {
this.prioritize = prioritize;
this.setData(values);
}
setData (values: Array<T>) {
this.values = values;
this.count = this.values.length;
this.dirty = true;
}
setQuery (query: String) {
this.query = query;
this.dirty = true;
}
private update () {
this.dirty = false;
this.values.forEach((x: T) => x.priority = this.prioritize(x, this.query));
this.values.sort((a: T, b: T): number =>
a.priority - b.priority);
}
length (): number { return this.count; }
take (count: number): Array<T> {
if (this.dirty) {
this.update();
}
return this.takeSlice(0, count);
}
takeNext (index: number): T {
if (this.dirty) {
this.update();
}
if (index > this.length()) {
throw new RangeError(""+index+" > "+this.length());
}
return this.values[index];
}
takeSlice (start: number, count: number): Array<T> {
if (this.dirty) {
this.update();
}
return this.values.slice(start, Math.min(this.length() - start, count));
}
}

/** To use with pure filtering algorithms like a strict ordered subset search (sublime-like) */
export class FilteringStringMatcher<T extends Prioritizable> implements IStatefulStringMatcher<T> {

}





export class StatefulStringMatcher<T> {
private values: Array<T>;
private mappingFunction: (T) => Number;



}









94 changes: 94 additions & 0 deletions test/common/fuzzy_search.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { FuzzyMatcher, StringMatchType, SpanifyResult, SpanifyResults } from '../../src/common/fuzzy_search';

describe("FuzzyMatcher tests", () => {
let fuzzy = new FuzzyMatcher();
test("It won't bother null checks, since typescript should be capable of handling that for us", () => {
// Do nothing
});
test(".match() should handle input edgecases", () => {
expect(fuzzy.match("","")).toEqual(true);
expect(fuzzy.match("asdf", "")).toEqual(false);
expect(fuzzy.match("", "hello!")).toEqual(true);
});
test(".match() should handle input as expected", () => {
expect(fuzzy.match("a", "a")).toEqual(true);
expect(fuzzy.match("a", "ab")).toEqual(true);
expect(fuzzy.match("b", "ab")).toEqual(true);
expect(fuzzy.match("ab", "a")).toEqual(false);
expect(fuzzy.match("ab", "b")).toEqual(false);
expect(fuzzy.match("fzy src", "fuzzy search")).toEqual(true);
expect(fuzzy.match("fuzzy search", "fzy src")).toEqual(false);
});
let results = fuzzy.newSpanifyResults();
test(".spanify() should behave the same as .match()", () => {
expect(fuzzy.spanify("", "", results)).toEqual(true);
expect(fuzzy.spanify("asdf", "", results)).toEqual(false);
expect(fuzzy.spanify("", "hello!", results)).toEqual(true);
expect(fuzzy.spanify("a", "a", results)).toEqual(true);
expect(fuzzy.spanify("a", "ab", results)).toEqual(true);
expect(fuzzy.spanify("b", "ab", results)).toEqual(true);
expect(fuzzy.spanify("ab", "a", results)).toEqual(false);
expect(fuzzy.spanify("ab", "b", results)).toEqual(false);
expect(fuzzy.spanify("fzy src", "fuzzy search", results)).toEqual(true);
expect(fuzzy.spanify("fzzy src", "asdf fuzzy search", results)).toEqual(true);
expect(fuzzy.spanify("fuzzy search", "fzy src", results)).toEqual(false);
});
test(".spanify() should return with no results when given empty arguments", () => {
expect(fuzzy.spanify("", "", results)).toEqual(true);
expect(results.length).toEqual(0);
});
test(".spanify() should return with one matching result when arguments match exactly", () => {
expect(fuzzy.spanify("foo", "foo", results)).toEqual(true);
expect(results.length).toEqual(1);
expect(results[0][0]).toEqual(StringMatchType.MATCHED);
expect(results[0][1]).toEqual("foo");
});
test(".spanify() should return with one unmatched / skipped result when query string is empty", () => {
expect(fuzzy.spanify("", "foo", results)).toEqual(true);
expect(results.length).toEqual(1);
expect(results[0][0]).toEqual(StringMatchType.UNMATCHED);
expect(results[0][1]).toEqual("foo");
});
test(".spanify() should return with one unmatched querry result (error) when value string is empty", () => {
expect(fuzzy.spanify("foo", "", results)).toEqual(false);
expect(results.length).toEqual(1);
expect(results[0][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY);
expect(results[0][1]).toEqual("foo");
});
test(".spanify() should return with the correct results when invoked on a correct match", () => {
expect(fuzzy.spanify("fzzy src", "asdf fuzzy search", results)).toEqual(true);
expect(results.length).toEqual(7);
expect(results[0][0]).toEqual(StringMatchType.UNMATCHED);
expect(results[0][1]).toEqual("asdf ");
expect(results[1][0]).toEqual(StringMatchType.MATCHED);
expect(results[1][1]).toEqual("f");
expect(results[2][0]).toEqual(StringMatchType.UNMATCHED);
expect(results[2][1]).toEqual("u");
expect(results[3][0]).toEqual(StringMatchType.MATCHED);
expect(results[3][1]).toEqual("zzy s");
expect(results[4][0]).toEqual(StringMatchType.UNMATCHED);
expect(results[4][1]).toEqual("ea");
expect(results[5][0]).toEqual(StringMatchType.MATCHED);
expect(results[5][1]).toEqual("rc");
expect(results[6][0]).toEqual(StringMatchType.UNMATCHED);
expect(results[6][1]).toEqual("h");
});
test(".spanify() should (maybe) still return useful results when invoked on an incorrect match", () => {
expect(fuzzy.spanify("asdf fuzzy search", "fzzy src", results)).toEqual(false);
expect(results.length).toEqual(7);
expect(results[0][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY);
expect(results[0][1]).toEqual("asd");
expect(results[1][0]).toEqual(StringMatchType.MATCHED);
expect(results[1][1]).toEqual("f");
expect(results[2][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY);
expect(results[2][1]).toEqual(" fu");
expect(results[3][0]).toEqual(StringMatchType.MATCHED);
expect(results[3][1]).toEqual("zzy s");
expect(results[4][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY);
expect(results[4][1]).toEqual("ea");
expect(results[5][0]).toEqual(StringMatchType.MATCHED);
expect(results[5][1]).toEqual("rc");
expect(results[6][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY);
expect(results[6][1]).toEqual("h");
});
});