From f6c1319d49f7bc07225f7964aaa8f0a6ad60977c Mon Sep 17 00:00:00 2001 From: Istvan Soos Date: Thu, 7 Nov 2024 12:57:39 +0100 Subject: [PATCH 1/2] Use IndexedScores in package and SDK doc page search. --- app/lib/search/token_index.dart | 58 ++++++--------------------------- 1 file changed, 10 insertions(+), 48 deletions(-) diff --git a/app/lib/search/token_index.dart b/app/lib/search/token_index.dart index b0f69ccb48..3982600bca 100644 --- a/app/lib/search/token_index.dart +++ b/app/lib/search/token_index.dart @@ -206,38 +206,6 @@ class TokenIndex { return tokenMatch; } - /// Returns an {id: score} map of the documents stored in the [TokenIndex]. - /// The tokens in [tokenMatch] will be used to calculate a weighted sum of scores. - /// - /// When [limitToIds] is specified, the result will contain only the set of - /// identifiers in it. - Map _scoreDocs(TokenMatch tokenMatch, - {double weight = 1.0, Set? limitToIds}) { - // Summarize the scores for the documents. - final scores = IndexedScore(_ids); - for (final token in tokenMatch.tokens) { - final docWeights = _inverseIds[token]!; - for (final e in docWeights.entries) { - scores.setValueMaxOf(e.key, tokenMatch[token]! * e.value); - } - } - - if (limitToIds != null) { - scores.retainWhere((_, id) => limitToIds.contains(id)); - } - final result = {}; - // post-process match weights - for (var i = 0; i < _length; i++) { - final w = scores._values[i]; - if (w <= 0.0) { - continue; - } - final id = _ids[i]; - result[id] = scores._values[i] * weight; - } - return result; - } - /// Search the index for [text], with a (term-match / document coverage percent) /// scoring. @visibleForTesting @@ -247,25 +215,19 @@ class TokenIndex { /// Search the index for [words], with a (term-match / document coverage percent) /// scoring. - Score searchWords(List words, - {double weight = 1.0, Set? limitToIds}) { - if (limitToIds != null && limitToIds.isEmpty) { - return Score.empty; - } - final scores = []; + Score searchWords(List words, {double weight = 1.0}) { + IndexedScore? score; for (final w in words) { - final tokens = lookupTokens(w); - final values = _scoreDocs( - tokens, - weight: weight, - limitToIds: limitToIds, - ); - if (values.isEmpty) { - return Score.empty; + final s = IndexedScore(_ids); + searchAndAccumulate(w, score: s, weight: weight); + if (score == null) { + score = s; + // NOTE: in the subsequent round(s), weight will be re-applied on the next word(s) too. + } else { + score.multiplyAllFrom(s); } - scores.add(Score(values)); } - return Score.multiply(scores); + return score?.toScore() ?? Score.empty; } /// Searches the index with [word] and stores the results in [score], using From 4f325732bf76d27f60eb66b3c122913cb4f43185 Mon Sep 17 00:00:00 2001 From: Istvan Soos Date: Thu, 7 Nov 2024 12:59:50 +0100 Subject: [PATCH 2/2] Fix weight --- app/lib/search/token_index.dart | 3 ++- app/test/search/api_doc_page_test.dart | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/app/lib/search/token_index.dart b/app/lib/search/token_index.dart index 3982600bca..c419dbfd43 100644 --- a/app/lib/search/token_index.dart +++ b/app/lib/search/token_index.dart @@ -216,13 +216,14 @@ class TokenIndex { /// Search the index for [words], with a (term-match / document coverage percent) /// scoring. Score searchWords(List words, {double weight = 1.0}) { + if (words.isEmpty) return Score.empty; IndexedScore? score; + weight = math.pow(weight, 1 / words.length).toDouble(); for (final w in words) { final s = IndexedScore(_ids); searchAndAccumulate(w, score: s, weight: weight); if (score == null) { score = s; - // NOTE: in the subsequent round(s), weight will be re-applied on the next word(s) too. } else { score.multiplyAllFrom(s); } diff --git a/app/test/search/api_doc_page_test.dart b/app/test/search/api_doc_page_test.dart index 40b5ca9606..5631515163 100644 --- a/app/test/search/api_doc_page_test.dart +++ b/app/test/search/api_doc_page_test.dart @@ -99,7 +99,7 @@ void main() { 'packageHits': [ { 'package': 'foo', - 'score': closeTo(0.18, 0.01), // find WebPageGenerator + 'score': closeTo(0.26, 0.01), // find WebPageGenerator 'apiPages': [ {'path': 'generator.html'}, ], @@ -119,7 +119,7 @@ void main() { 'packageHits': [ { 'package': 'foo', - 'score': closeTo(0.11, 0.01), // find WebPageGenerator + 'score': closeTo(0.16, 0.01), // find WebPageGenerator 'apiPages': [ {'path': 'generator.html'}, ],