diff --git a/gopls/internal/lsp/cmd/usage/workspace_symbol.hlp b/gopls/internal/lsp/cmd/usage/workspace_symbol.hlp index a61b47b332e..ed22e989ee3 100644 --- a/gopls/internal/lsp/cmd/usage/workspace_symbol.hlp +++ b/gopls/internal/lsp/cmd/usage/workspace_symbol.hlp @@ -9,5 +9,5 @@ Example: workspace_symbol-flags: -matcher=string - specifies the type of matcher: fuzzy, caseSensitive, or caseInsensitive. - The default is caseInsensitive. + specifies the type of matcher: fuzzy, fastfuzzy, casesensitive, or caseinsensitive. + The default is caseinsensitive. diff --git a/gopls/internal/lsp/cmd/workspace_symbol.go b/gopls/internal/lsp/cmd/workspace_symbol.go index ed28df093da..520d6bce810 100644 --- a/gopls/internal/lsp/cmd/workspace_symbol.go +++ b/gopls/internal/lsp/cmd/workspace_symbol.go @@ -8,6 +8,7 @@ import ( "context" "flag" "fmt" + "strings" "golang.org/x/tools/gopls/internal/lsp/protocol" "golang.org/x/tools/gopls/internal/lsp/source" @@ -16,7 +17,7 @@ import ( // workspaceSymbol implements the workspace_symbol verb for gopls. type workspaceSymbol struct { - Matcher string `flag:"matcher" help:"specifies the type of matcher: fuzzy, caseSensitive, or caseInsensitive.\nThe default is caseInsensitive."` + Matcher string `flag:"matcher" help:"specifies the type of matcher: fuzzy, fastfuzzy, casesensitive, or caseinsensitive.\nThe default is caseinsensitive."` app *Application } @@ -46,10 +47,10 @@ func (r *workspaceSymbol) Run(ctx context.Context, args ...string) error { if opts != nil { opts(o) } - switch r.Matcher { + switch strings.ToLower(r.Matcher) { case "fuzzy": o.SymbolMatcher = source.SymbolFuzzy - case "caseSensitive": + case "casesensitive": o.SymbolMatcher = source.SymbolCaseSensitive case "fastfuzzy": o.SymbolMatcher = source.SymbolFastFuzzy diff --git a/gopls/internal/lsp/source/workspace_symbol.go b/gopls/internal/lsp/source/workspace_symbol.go index 4a65d529ae1..bf92c77e9e9 100644 --- a/gopls/internal/lsp/source/workspace_symbol.go +++ b/gopls/internal/lsp/source/workspace_symbol.go @@ -484,7 +484,10 @@ func matchFile(store *symbolStore, symbolizer symbolizer, matcher matcherFunc, r // every field or method nesting level to access the field decreases // the score by a factor of 1.0 - depth*depthFactor, up to a depth of // 3. - depthFactor = 0.2 + // + // Use a small constant here, as this exists mostly to break ties + // (e.g. given a type Foo and a field x.Foo, prefer Foo). + depthFactor = 0.01 ) startWord := true diff --git a/internal/fuzzy/symbol.go b/internal/fuzzy/symbol.go index 073a4cd1012..bf93041521b 100644 --- a/internal/fuzzy/symbol.go +++ b/internal/fuzzy/symbol.go @@ -26,9 +26,6 @@ import ( // symbol or identifiers, so doing this avoids allocating strings. // - We can return the index of the right-most match, allowing us to trim // irrelevant qualification. -// -// This implementation is experimental, serving as a reference fast algorithm -// to compare to the fuzzy algorithm implemented by Matcher. type SymbolMatcher struct { // Using buffers of length 256 is both a reasonable size for most qualified // symbols, and makes it easy to avoid bounds checks by using uint8 indexes. @@ -169,19 +166,29 @@ input: // Score is the average score for each character. // // A character score is the multiple of: - // 1. 1.0 if the character starts a segment, .8 if the character start a - // mid-segment word, otherwise 0.6. This carries over to immediately - // following characters. - // 2. For the final character match, the multiplier from (1) is reduced to - // .8 if the next character in the input is a mid-segment word, or 0.6 if - // the next character in the input is not a word or segment start. This - // ensures that we favor whole-word or whole-segment matches over prefix - // matches. - // 3. 1.0 if the character is part of the last segment, otherwise - // 1.0-.2*, with a max segment count of 3. + // 1. 1.0 if the character starts a segment or is preceded by a matching + // character, 0.9 if the character starts a mid-segment word, else 0.6. + // + // Note that characters preceded by a matching character get the max + // score of 1.0 so that sequential or exact matches are preferred, even + // if they don't start/end at a segment or word boundary. For example, a + // match for "func" in intfuncs should have a higher score than in + // ifunmatched. + // + // For the final character match, the multiplier from (1) is reduced to + // 0.9 if the next character in the input is a mid-segment word, or 0.6 + // if the next character in the input is not a word or segment start. + // This ensures that we favor whole-word or whole-segment matches over + // prefix matches. + // + // 2. 1.0 if the character is part of the last segment, otherwise + // 1.0-0.1*, with a max segment count of 3. + // Notably 1.0-0.1*3 = 0.7 > 0.6, so that foo/_/_/_/_ (a match very + // early in a qualified symbol name) still scores higher than _f_o_o_ + // (a completely split match). // - // This is a very naive algorithm, but it is fast. There's lots of prior art - // here, and we should leverage it. For example, we could explicitly consider + // This is a naive algorithm, but it is fast. There's lots of prior art here + // that could be leveraged. For example, we could explicitly consider // character distance, and exact matches of words or segments. // // Also note that this might not actually find the highest scoring match, as @@ -192,10 +199,10 @@ input: p = m.pattern[pi] const ( - segStreak = 1.0 - wordStreak = 0.8 + segStreak = 1.0 // start of segment or sequential match + wordStreak = 0.9 // start of word match noStreak = 0.6 - perSegment = 0.2 // we count at most 3 segments above + perSegment = 0.1 // we count at most 3 segments above ) streakBonus := noStreak @@ -228,6 +235,7 @@ input: if finalChar { break } + streakBonus = segStreak // see above: sequential characters get the max score } else { streakBonus = noStreak } diff --git a/internal/fuzzy/symbol_test.go b/internal/fuzzy/symbol_test.go index df74bbe0d37..2a9d9b663bd 100644 --- a/internal/fuzzy/symbol_test.go +++ b/internal/fuzzy/symbol_test.go @@ -40,12 +40,12 @@ func TestSymbolRanking(t *testing.T) { symbols := []string{ "this.is.better.than.most", "test.foo.bar", - "atest", "thebest", "test.foo", "test.foo", - "tTest", + "atest", "testage", + "tTest", "foo.test", "test", } @@ -60,6 +60,33 @@ func TestSymbolRanking(t *testing.T) { } } +// Test that we strongly prefer exact matches. +// +// In golang/go#60027, we preferred "Runner" for the query "rune" over several +// results containing the word "rune" exactly. Following this observation, +// scoring was tweaked to more strongly emphasize sequential characters and +// exact matches. +func TestSymbolRanking_Issue60027(t *testing.T) { + matcher := NewSymbolMatcher("rune") + + // symbols to match, in ascending order of ranking. + symbols := []string{ + "Runner", + "singleRuneParam", + "Config.ifsRune", + "Parser.rune", + } + prev := 0.0 + for _, sym := range symbols { + _, score := matcher.Match([]string{sym}) + t.Logf("Match(%q) = %v", sym, score) + if score < prev { + t.Errorf("Match(%q) = _, %v, want > %v", sym, score, prev) + } + prev = score + } +} + func TestChunkedMatch(t *testing.T) { matcher := NewSymbolMatcher("test")