diff --git a/src/common/fuzzy_search.ts b/src/common/fuzzy_search.ts new file mode 100644 index 0000000..4844942 --- /dev/null +++ b/src/common/fuzzy_search.ts @@ -0,0 +1,143 @@ +import * as assert from 'assert'; + +export enum StringMatchType { + /** Marks matched part of value string + query string */ + MATCHED, + + /** Marks unmatched (skipped) part of value string */ + UNMATCHED, + + /** Marks unmatched part of query string; probably indicates an error if present. */ + ERROR_UNMATCHED_QUERY +} +export type SpanifyResult = [StringMatchType, String]; +export type SpanifyResults = Array; + + +/** Generic interface for any string matching algorithm */ +export interface IStringMatcher { + /** + * Matches a value string against a query string. + * + * @param query The query string (eg. "fzsrch") + * @param value The value we're checking (eg. "fuzzy search") + * @returns true iff the passed in value string matches the query string according to the algorithm we're implementing. + */ + match (query: String, value: String): boolean; + + /** + * Spanifies (ie. breaks into sub-strings / matched tokens) a string to visualize how it's being + * matched by match. Assumes that you've already called match() to determine whether the string matches or not. + * + * @param query The query string (eg. "fzsrch") + * @param value The value we're checking (eg. "fuzzy search") + * @param results An external, cached array that we can store our results in. + * Will have its length set to zero, then appended to. + * Not returning a _new_ (allocated) array, b/c that would be wasteful and we'd like to minimize gc. + * @returns true iff matched everything, false otherwise + */ + spanify (query: String, value: String, results: SpanifyResults): boolean; + + /** + * @returns a new SpanifyResults() (ie. new Array<[StringMatchTYpe, String]>()). + * Convenience function since otherwise this would be annoying. + */ + newSpanifyResults (): SpanifyResults; +} + +export interface IStringPrioritizer { + priority (value: String): Number; +} +export interface IStringSorter { + sort (values: String, prioritizer: IStringPrioritizer); +} + +export class StringSearchFilterer { + private sorter: IStringSorter; + private filterer: IStringMatcher | null; + private spanResults: SpanifyResults | null; + + constructor (sorter: IStringSorter, filterer: IStringMatcher | null) { + this.sorter = sorter; + this.filterer = filterer; + this.spanResults = null; + } +} + + + + + +/** + * Implements a fuzzy string search algorithm that returns true if query is a strictly ordered subset of value. + * + * @example + * ```typescript + * let fuzzy = new FuzzyMatcher(); + * assert(fuzzy.match("fzy", "fuzzy")); + * assert(!fuzzy.match("fyz", "fuzzy")); + * ``` + * @exmaple + * ```typescript + * let results = new SpanifyResults(); + * fuzzy.spanify("fzy", "fuzzy", results); + * console.log('Matching "fuzzy" against "fzy"'); + * results.forEach(function(result){ + * let match = result[0], substring = result[1]; + * console.log(''+match+': "'+substring+'""); + * }); + * ``` + */ +export class FuzzyMatcher implements IStringMatcher { + match (query: String, value: String): boolean { + let i = value.length, j = query.length; + while (j != 0 && i >= j) { + if (value[i - 1] == query[j - 1]) { + --j; + } + --i; + } + return j == 0; + } + spanify (query: String, value: String, results: SpanifyResults): boolean { + results.length = 0; + + // Initial spanify function. + // This code is probably wrong (have yet to test, b/c didn't bother getting + // typescript setup w/ npm...) + + let i = 0, j = 0, n = value.length, m = query.length; + while (i < n && j < m) { + if (value[i] == query[j]) { + let i0 = i; + while (i < n && j < m && value[i] == query[j]) { + ++i, ++j; + } + results.push([ StringMatchType.MATCHED, value.substr(i0, i - i0) ]); + } else { + let i0 = i, j0 = j; + while (i < n && value[i] != query[j0]) ++i; + while (j < m && value[i0] != query[j]) ++j; + if ((i - i0) < (j - j0)) { + results.push([ StringMatchType.UNMATCHED, value.substr(i0, i - i0) ]); + j = j0; + } else { + results.push([ StringMatchType.ERROR_UNMATCHED_QUERY, query.substr(j0, j - j0) ]); + i = i0; + } + } + } + return i == n && j == m; + } + newSpanifyResults (): SpanifyResults { + return new Array<[StringMatchType, String]>(); + } +} + +export class SubstringMatcher implements IStringMatcher { + match (query: String, value: String): boolean { + return false; + } +} + + diff --git a/src/common/matcher.ts b/src/common/matcher.ts new file mode 100644 index 0000000..42a3a9c --- /dev/null +++ b/src/common/matcher.ts @@ -0,0 +1,90 @@ + +export interface IStatefulStringMatcher { + setData (values: Array); + setQuery (query: String); + length (): number; + take (count: number): Array; + takeNext (start: number): T; + takeSlice (start: number, count: number): Array; +} + +class Prioritizable { + priority: number = 0; +} + +/** To use with edit distance algorithms like levehnstein */ +export class SortingStringMatcher implements IStatefulStringMatcher { + private values: Array; + private count: number; + private query: String; + private dirty: Boolean; + private prioritize: (T, String) => number; + + constructor (values: Array, prioritize: (T, String) => number) { + this.prioritize = prioritize; + this.setData(values); + } + setData (values: Array) { + this.values = values; + this.count = this.values.length; + this.dirty = true; + } + setQuery (query: String) { + this.query = query; + this.dirty = true; + } + private update () { + this.dirty = false; + this.values.forEach((x: T) => x.priority = this.prioritize(x, this.query)); + this.values.sort((a: T, b: T): number => + a.priority - b.priority); + } + length (): number { return this.count; } + take (count: number): Array { + if (this.dirty) { + this.update(); + } + return this.takeSlice(0, count); + } + takeNext (index: number): T { + if (this.dirty) { + this.update(); + } + if (index > this.length()) { + throw new RangeError(""+index+" > "+this.length()); + } + return this.values[index]; + } + takeSlice (start: number, count: number): Array { + if (this.dirty) { + this.update(); + } + return this.values.slice(start, Math.min(this.length() - start, count)); + } +} + +/** To use with pure filtering algorithms like a strict ordered subset search (sublime-like) */ +export class FilteringStringMatcher implements IStatefulStringMatcher { + +} + + + + + +export class StatefulStringMatcher { + private values: Array; + private mappingFunction: (T) => Number; + + + +} + + + + + + + + + diff --git a/test/common/fuzzy_search.spec.ts b/test/common/fuzzy_search.spec.ts new file mode 100644 index 0000000..9acf2f6 --- /dev/null +++ b/test/common/fuzzy_search.spec.ts @@ -0,0 +1,94 @@ +import { FuzzyMatcher, StringMatchType, SpanifyResult, SpanifyResults } from '../../src/common/fuzzy_search'; + +describe("FuzzyMatcher tests", () => { + let fuzzy = new FuzzyMatcher(); + test("It won't bother null checks, since typescript should be capable of handling that for us", () => { + // Do nothing + }); + test(".match() should handle input edgecases", () => { + expect(fuzzy.match("","")).toEqual(true); + expect(fuzzy.match("asdf", "")).toEqual(false); + expect(fuzzy.match("", "hello!")).toEqual(true); + }); + test(".match() should handle input as expected", () => { + expect(fuzzy.match("a", "a")).toEqual(true); + expect(fuzzy.match("a", "ab")).toEqual(true); + expect(fuzzy.match("b", "ab")).toEqual(true); + expect(fuzzy.match("ab", "a")).toEqual(false); + expect(fuzzy.match("ab", "b")).toEqual(false); + expect(fuzzy.match("fzy src", "fuzzy search")).toEqual(true); + expect(fuzzy.match("fuzzy search", "fzy src")).toEqual(false); + }); + let results = fuzzy.newSpanifyResults(); + test(".spanify() should behave the same as .match()", () => { + expect(fuzzy.spanify("", "", results)).toEqual(true); + expect(fuzzy.spanify("asdf", "", results)).toEqual(false); + expect(fuzzy.spanify("", "hello!", results)).toEqual(true); + expect(fuzzy.spanify("a", "a", results)).toEqual(true); + expect(fuzzy.spanify("a", "ab", results)).toEqual(true); + expect(fuzzy.spanify("b", "ab", results)).toEqual(true); + expect(fuzzy.spanify("ab", "a", results)).toEqual(false); + expect(fuzzy.spanify("ab", "b", results)).toEqual(false); + expect(fuzzy.spanify("fzy src", "fuzzy search", results)).toEqual(true); + expect(fuzzy.spanify("fzzy src", "asdf fuzzy search", results)).toEqual(true); + expect(fuzzy.spanify("fuzzy search", "fzy src", results)).toEqual(false); + }); + test(".spanify() should return with no results when given empty arguments", () => { + expect(fuzzy.spanify("", "", results)).toEqual(true); + expect(results.length).toEqual(0); + }); + test(".spanify() should return with one matching result when arguments match exactly", () => { + expect(fuzzy.spanify("foo", "foo", results)).toEqual(true); + expect(results.length).toEqual(1); + expect(results[0][0]).toEqual(StringMatchType.MATCHED); + expect(results[0][1]).toEqual("foo"); + }); + test(".spanify() should return with one unmatched / skipped result when query string is empty", () => { + expect(fuzzy.spanify("", "foo", results)).toEqual(true); + expect(results.length).toEqual(1); + expect(results[0][0]).toEqual(StringMatchType.UNMATCHED); + expect(results[0][1]).toEqual("foo"); + }); + test(".spanify() should return with one unmatched querry result (error) when value string is empty", () => { + expect(fuzzy.spanify("foo", "", results)).toEqual(false); + expect(results.length).toEqual(1); + expect(results[0][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY); + expect(results[0][1]).toEqual("foo"); + }); + test(".spanify() should return with the correct results when invoked on a correct match", () => { + expect(fuzzy.spanify("fzzy src", "asdf fuzzy search", results)).toEqual(true); + expect(results.length).toEqual(7); + expect(results[0][0]).toEqual(StringMatchType.UNMATCHED); + expect(results[0][1]).toEqual("asdf "); + expect(results[1][0]).toEqual(StringMatchType.MATCHED); + expect(results[1][1]).toEqual("f"); + expect(results[2][0]).toEqual(StringMatchType.UNMATCHED); + expect(results[2][1]).toEqual("u"); + expect(results[3][0]).toEqual(StringMatchType.MATCHED); + expect(results[3][1]).toEqual("zzy s"); + expect(results[4][0]).toEqual(StringMatchType.UNMATCHED); + expect(results[4][1]).toEqual("ea"); + expect(results[5][0]).toEqual(StringMatchType.MATCHED); + expect(results[5][1]).toEqual("rc"); + expect(results[6][0]).toEqual(StringMatchType.UNMATCHED); + expect(results[6][1]).toEqual("h"); + }); + test(".spanify() should (maybe) still return useful results when invoked on an incorrect match", () => { + expect(fuzzy.spanify("asdf fuzzy search", "fzzy src", results)).toEqual(false); + expect(results.length).toEqual(7); + expect(results[0][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY); + expect(results[0][1]).toEqual("asd"); + expect(results[1][0]).toEqual(StringMatchType.MATCHED); + expect(results[1][1]).toEqual("f"); + expect(results[2][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY); + expect(results[2][1]).toEqual(" fu"); + expect(results[3][0]).toEqual(StringMatchType.MATCHED); + expect(results[3][1]).toEqual("zzy s"); + expect(results[4][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY); + expect(results[4][1]).toEqual("ea"); + expect(results[5][0]).toEqual(StringMatchType.MATCHED); + expect(results[5][1]).toEqual("rc"); + expect(results[6][0]).toEqual(StringMatchType.ERROR_UNMATCHED_QUERY); + expect(results[6][1]).toEqual("h"); + }); +});