From 60c0155a60aa0ae29c3e4c79e5f7a73990d6d37e Mon Sep 17 00:00:00 2001
From: Istvan Soos <istvan.soos@gmail.com>
Date: Thu, 7 Nov 2024 10:14:36 +0100
Subject: [PATCH 1/2] Use IndexedScore to accumulate name, description and
 readme scores.

---
 app/lib/search/mem_index.dart   | 80 +++++++++++++++------------------
 app/lib/search/token_index.dart | 42 +++++++++++++++++
 2 files changed, 77 insertions(+), 45 deletions(-)
diff --git a/app/lib/search/mem_index.dart b/app/lib/search/mem_index.dart
index 2cd6811545..a0fb1ed001 100644
--- a/app/lib/search/mem_index.dart
+++ b/app/lib/search/mem_index.dart
@@ -304,32 +304,28 @@ class InMemoryPackageIndex {
       // We cannot update the main `packages` variable yet, as the dartdoc API
       // symbols are added on top of the core results, and `packages` is used
       // there too.
-      final coreScores = <Score>[];
-      var wordScopedPackages = packages;
+      final coreScores = IndexedScore(_packageNameIndex._packageNames);
+      for (var i = 0; i < _documents.length; i++) {
+        if (packages.contains(_documents[i].package)) {
+          coreScores.setValue(i, 1.0);
+        }
+      }
       for (final word in words) {
-        final nameScore = _packageNameIndex.searchWord(word,
-            filterOnPackages: wordScopedPackages);
         if (includeNameMatches && _documentsByName.containsKey(word)) {
           nameMatches ??= <String>{};
           nameMatches.add(word);
         }
 
-        final descr = _descrIndex
-            .searchWords([word], weight: 0.90, limitToIds: wordScopedPackages);
-        final readme = _readmeIndex
-            .searchWords([word], weight: 0.75, limitToIds: wordScopedPackages);
-        final score = Score.max([nameScore, descr, readme]);
-        coreScores.add(score);
-        // don't update if the query is single-word
-        if (words.length > 1) {
-          wordScopedPackages = score.keys.toSet();
-          if (wordScopedPackages.isEmpty) {
-            break;
-          }
-        }
+        final wordScore =
+            _packageNameIndex.searchWord(word, filterOnNonZeros: coreScores);
+        _descrIndex.searchAndAccumulate(word,
+            weight: 0.90.toDouble(), score: wordScore);
+        _readmeIndex.searchAndAccumulate(word,
+            weight: 0.75.toDouble(), score: wordScore);
+        coreScores.multiplyAllFrom(wordScore);
       }
 
-      final core = Score.multiply(coreScores);
+      final core = coreScores.toScore();
 
       var symbolPages = Score.empty;
       if (!checkAborted()) {
@@ -495,16 +491,13 @@ class _TextResults {
 @visibleForTesting
 class PackageNameIndex {
   final List<String> _packageNames;
-  late final Map<String, _PkgNameData> _data;
+  late final List<_PkgNameData> _data;
 
   PackageNameIndex(this._packageNames) {
-    _data = Map.fromEntries(_packageNames.map((package) {
+    _data = _packageNames.map((package) {
       final collapsed = _collapseName(package);
-      return MapEntry(
-        package,
-        _PkgNameData(collapsed, trigrams(collapsed).toSet()),
-      );
-    }));
+      return _PkgNameData(collapsed, trigrams(collapsed).toSet());
+    }).toList();
   }
 
   /// Maps package name to a reduced form of the name:
@@ -515,45 +508,42 @@ class PackageNameIndex {
   /// Search [text] and return the matching packages with scores.
   @visibleForTesting
   Score search(String text) {
-    Score? score;
+    IndexedScore? score;
     for (final w in splitForQuery(text)) {
-      final s = searchWord(w, filterOnPackages: score?.keys);
+      final s = searchWord(w, filterOnNonZeros: score);
       if (score == null) {
         score = s;
       } else {
-        // Note: on one hand, it is inefficient to multiply the [Score] on each
-        // iteration. However, (1) this is only happening in test, (2) it may be
-        // better for the next iteration to work on a more limited `filterOnPackages`,
-        // and (3) it will be updated to a more efficient in-place update (#8225).
-        score = Score.multiply([score, s]);
+        score.multiplyAllFrom(s);
       }
     }
-    return score ?? Score.empty;
+    return score?.toScore() ?? Score.empty;
   }
 
   /// Search using the parsed [word] and return the matching packages with scores
-  /// as a new [Score] instance.
+  /// as a new [IndexedScore] instance.
   ///
-  /// When [filterOnPackages] is present, only the names present are evaluated.
-  Score searchWord(
+  /// When [filterOnNonZeros] is present, only the indexes with an already
+  /// non-zero value are evaluated.
+  IndexedScore searchWord(
     String word, {
-    Iterable<String>? filterOnPackages,
+    IndexedScore? filterOnNonZeros,
   }) {
-    final pkgNamesToCheck = filterOnPackages ?? _packageNames;
-    final values = <String, double>{};
+    final score = IndexedScore(_packageNames);
     final singularWord = word.length <= 3 || !word.endsWith('s')
         ? word
         : word.substring(0, word.length - 1);
     final collapsedWord = _collapseName(singularWord);
     final parts =
         collapsedWord.length <= 3 ? [collapsedWord] : trigrams(collapsedWord);
-    for (final pkg in pkgNamesToCheck) {
-      final entry = _data[pkg];
-      if (entry == null) {
+    for (var i = 0; i < _data.length; i++) {
+      if (filterOnNonZeros?.isNotPositive(i) ?? false) {
         continue;
       }
+
+      final entry = _data[i];
       if (entry.collapsed.contains(collapsedWord)) {
-        values[pkg] = 1.0;
+        score.setValue(i, 1.0);
         continue;
       }
       var matched = 0;
@@ -567,11 +557,11 @@ class PackageNameIndex {
       if (matched > 0) {
         final v = matched / parts.length;
         if (v >= 0.5) {
-          values[pkg] = v;
+          score.setValue(i, v);
         }
       }
     }
-    return Score(values);
+    return score;
   }
 }
 
diff --git a/app/lib/search/token_index.dart b/app/lib/search/token_index.dart
index 95d42a46ba..b0f69ccb48 100644
--- a/app/lib/search/token_index.dart
+++ b/app/lib/search/token_index.dart
@@ -267,6 +267,24 @@ class TokenIndex {
     }
     return Score.multiply(scores);
   }
+
+  /// Searches the index with [word] and stores the results in [score], using
+  /// accumulation operation on the already existing values.
+  void searchAndAccumulate(
+    String word, {
+    double weight = 1.0,
+    required IndexedScore score,
+  }) {
+    assert(score.length == _length);
+    final tokenMatch = lookupTokens(word);
+    for (final token in tokenMatch.tokens) {
+      final matchWeight = tokenMatch[token]!;
+      final tokenWeight = _inverseIds[token]!;
+      for (final e in tokenWeight.entries) {
+        score.setValueMaxOf(e.key, matchWeight * e.value * weight);
+      }
+    }
+  }
 }
 
 /// Mutable score list that can accessed via integer index.
@@ -285,6 +303,10 @@ class IndexedScore {
     return _values[index] <= 0.0;
   }
 
+  void setValue(int index, double value) {
+    _values[index] = value;
+  }
+
   void setValueMaxOf(int index, double value) {
     _values[index] = math.max(_values[index], value);
   }
@@ -307,6 +329,15 @@ class IndexedScore {
     }
   }
 
+  void multiplyAllFrom(IndexedScore other) {
+    assert(other._values.length == _values.length);
+    for (var i = 0; i < _values.length; i++) {
+      if (_values[i] == 0.0) continue;
+      final v = other._values[i];
+      _values[i] = v == 0.0 ? 0.0 : _values[i] * v;
+    }
+  }
+
   Set<String> toKeySet() {
     final set = <String>{};
     for (var i = 0; i < _values.length; i++) {
@@ -317,4 +348,15 @@ class IndexedScore {
     }
     return set;
   }
+
+  Score toScore() {
+    final map = <String, double>{};
+    for (var i = 0; i < _values.length; i++) {
+      final v = _values[i];
+      if (v > 0.0) {
+        map[_keys[i]] = v;
+      }
+    }
+    return Score._(map);
+  }
 }

From f1ab69e37a5f396115dcb702ceccc82db0bc6d88 Mon Sep 17 00:00:00 2001
From: Istvan Soos <istvan.soos@gmail.com>
Date: Thu, 7 Nov 2024 10:18:19 +0100
Subject: [PATCH 2/2] Changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d30a01b5ca..7df7830a8e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@ Important changes to data models, configuration, and migrations between each
 AppEngine version, listed here to ease deployment and troubleshooting.
 
 ## Next Release (replace with git tag when deployed)
+ * `search` uses the `IndexedScore` to reduce memory allocations.
 
 ## `20241031t095600-all`
  * Bumped runtimeVersion to `2024.10.29`.