From 361835bbac2935e406c039fd76f73b668c656f09 Mon Sep 17 00:00:00 2001
From: Istvan Soos <istvan.soos@gmail.com>
Date: Mon, 4 Nov 2024 17:16:42 +0100
Subject: [PATCH] Reduce the memory allocation during search with mutable
 IndexedScore.

---
 CHANGELOG.md                           |   1 +
 app/lib/search/mem_index.dart          |  83 ++++++++----------
 app/lib/search/token_index.dart        | 113 +++++++++++++++----------
 app/test/search/api_doc_page_test.dart |   4 +-
 4 files changed, 109 insertions(+), 92 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d30a01b5ca..7df7830a8e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@ Important changes to data models, configuration, and migrations between each
 AppEngine version, listed here to ease deployment and troubleshooting.
 
 ## Next Release (replace with git tag when deployed)
+ * `search` uses the `IndexedScore` to reduce memory allocations.
 
 ## `20241031t095600-all`
  * Bumped runtimeVersion to `2024.10.29`.
diff --git a/app/lib/search/mem_index.dart b/app/lib/search/mem_index.dart
index 0aabd9af30..b0d3f4b0c6 100644
--- a/app/lib/search/mem_index.dart
+++ b/app/lib/search/mem_index.dart
@@ -311,32 +311,28 @@ class InMemoryPackageIndex {
       // We cannot update the main `packages` variable yet, as the dartdoc API
       // symbols are added on top of the core results, and `packages` is used
       // there too.
-      final coreScores = <Score>[];
-      var wordScopedPackages = packages;
+      final coreScores = IndexedScore(_packageNameIndex._packageNames);
+      for (var i = 0; i < _documents.length; i++) {
+        if (packages.contains(_documents[i].package)) {
+          coreScores.setValue(i, 1.0);
+        }
+      }
+
       for (final word in words) {
-        final nameScore = _packageNameIndex.searchWord(word,
-            filterOnPackages: wordScopedPackages);
         if (includeNameMatches && _documentsByName.containsKey(word)) {
           nameMatches ??= <String>{};
           nameMatches.add(word);
         }
 
-        final descr = _descrIndex
-            .searchWords([word], weight: 0.90, limitToIds: wordScopedPackages);
-        final readme = _readmeIndex
-            .searchWords([word], weight: 0.75, limitToIds: wordScopedPackages);
-        final score = Score.max([nameScore, descr, readme]);
-        coreScores.add(score);
-        // don't update if the query is single-word
-        if (words.length > 1) {
-          wordScopedPackages = score.keys.toSet();
-          if (wordScopedPackages.isEmpty) {
-            break;
-          }
-        }
+        final wordScore =
+            _packageNameIndex.searchWord(word, filterOnNonZeros: coreScores);
+        _descrIndex.searchAndAccumulate(word,
+            weight: 0.90.toDouble(), score: wordScore);
+        _readmeIndex.searchAndAccumulate(word,
+            weight: 0.75.toDouble(), score: wordScore);
+        coreScores.multiplyAllFrom(wordScore);
       }
-
-      final core = Score.multiply(coreScores);
+      final core = coreScores.toScore();
 
       var symbolPages = Score.empty;
       if (!checkAborted()) {
@@ -502,16 +498,13 @@ class _TextResults {
 @visibleForTesting
 class PackageNameIndex {
   final List<String> _packageNames;
-  late final Map<String, _PkgNameData> _data;
+  late final List<_PkgNameData> _data;
 
   PackageNameIndex(this._packageNames) {
-    _data = Map.fromEntries(_packageNames.map((package) {
+    _data = _packageNames.map((package) {
       final collapsed = _collapseName(package);
-      return MapEntry(
-        package,
-        _PkgNameData(collapsed, trigrams(collapsed).toSet()),
-      );
-    }));
+      return _PkgNameData(collapsed, trigrams(collapsed).toSet());
+    }).toList();
   }
 
   /// Maps package name to a reduced form of the name:
@@ -522,45 +515,43 @@ class PackageNameIndex {
   /// Search [text] and return the matching packages with scores.
   @visibleForTesting
   Score search(String text) {
-    Score? score;
+    IndexedScore? score;
     for (final w in splitForQuery(text)) {
-      final s = searchWord(w, filterOnPackages: score?.keys);
+      final s = searchWord(w, filterOnNonZeros: score);
       if (score == null) {
         score = s;
       } else {
-        // Note: on one hand, it is inefficient to multiply the [Score] on each
-        // iteration. However, (1) this is only happening in test, (2) it may be
-        // better for the next iteration to work on a more limited `filterOnPackages`,
-        // and (3) it will be updated to a more efficient in-place update (#8225).
-        score = Score.multiply([score, s]);
+        score.multiplyAllFrom(s);
       }
     }
-    return score ?? Score.empty;
+    return score?.toScore() ?? Score.empty;
+
   }
 
   /// Search using the parsed [word] and return the matching packages with scores
-  /// as a new [Score] instance.
+  /// as a new [IndexedScore] instance.
   ///
-  /// When [filterOnPackages] is present, only the names present are evaluated.
-  Score searchWord(
+  /// When [filterOnNonZeros] is present, only the indexes with an already
+  /// non-zero value are evaluated.
+  IndexedScore searchWord(
     String word, {
-    Iterable<String>? filterOnPackages,
+    IndexedScore? filterOnNonZeros,
   }) {
-    final pkgNamesToCheck = filterOnPackages ?? _packageNames;
-    final values = <String, double>{};
+    final score = IndexedScore(_packageNames);
     final singularWord = word.length <= 3 || !word.endsWith('s')
         ? word
         : word.substring(0, word.length - 1);
     final collapsedWord = _collapseName(singularWord);
     final parts =
         collapsedWord.length <= 3 ? [collapsedWord] : trigrams(collapsedWord);
-    for (final pkg in pkgNamesToCheck) {
-      final entry = _data[pkg];
-      if (entry == null) {
+    for (var i = 0; i < _data.length; i++) {
+      if (filterOnNonZeros?.isNotPositive(i) ?? false) {
         continue;
       }
+
+      final entry = _data[i];
       if (entry.collapsed.contains(collapsedWord)) {
-        values[pkg] = 1.0;
+        score.setValue(i, 1.0);
         continue;
       }
       var matched = 0;
@@ -574,11 +565,11 @@ class PackageNameIndex {
       if (matched > 0) {
         final v = matched / parts.length;
         if (v >= 0.5) {
-          values[pkg] = v;
+          score.setValue(i, v);
         }
       }
     }
-    return Score(values);
+    return score;
   }
 }
 
diff --git a/app/lib/search/token_index.dart b/app/lib/search/token_index.dart
index fda0f91cf7..4af4d43bb6 100644
--- a/app/lib/search/token_index.dart
+++ b/app/lib/search/token_index.dart
@@ -206,37 +206,22 @@ class TokenIndex {
     return tokenMatch;
   }
 
-  /// Returns an {id: score} map of the documents stored in the [TokenIndex].
-  /// The tokens in [tokenMatch] will be used to calculate a weighted sum of scores.
-  ///
-  /// When [limitToIds] is specified, the result will contain only the set of
-  /// identifiers in it.
-  Map<String, double> _scoreDocs(TokenMatch tokenMatch,
-      {double weight = 1.0, Set<String>? limitToIds}) {
-    // Summarize the scores for the documents.
-    final docScores = List<double>.filled(_length, 0.0);
+  /// Searches the index with [word] and stores the results in [score], using
+  /// accumulation operation on the already existing values.
+  void searchAndAccumulate(
+    String word, {
+    double weight = 1.0,
+    required IndexedScore score,
+  }) {
+    assert(score.length == _length);
+    final tokenMatch = lookupTokens(word);
     for (final token in tokenMatch.tokens) {
-      final docWeights = _inverseIds[token]!;
-      for (final e in docWeights.entries) {
-        final i = e.key;
-        docScores[i] = math.max(docScores[i], tokenMatch[token]! * e.value);
+      final matchWeight = tokenMatch[token]!;
+      final tokenWeight = _inverseIds[token]!;
+      for (final e in tokenWeight.entries) {
+        score.setValueMaxOf(e.key, matchWeight * e.value * weight);
       }
     }
-
-    final result = <String, double>{};
-    // post-process match weights
-    for (var i = 0; i < _length; i++) {
-      final id = _ids[i];
-      final w = docScores[i];
-      if (w <= 0.0) {
-        continue;
-      }
-      if (limitToIds != null && !limitToIds.contains(id)) {
-        continue;
-      }
-      result[id] = w * weight;
-    }
-    return result;
   }
 
   /// Search the index for [text], with a (term-match / document coverage percent)
@@ -248,24 +233,64 @@ class TokenIndex {
 
   /// Search the index for [words], with a (term-match / document coverage percent)
   /// scoring.
-  Score searchWords(List<String> words,
-      {double weight = 1.0, Set<String>? limitToIds}) {
-    if (limitToIds != null && limitToIds.isEmpty) {
-      return Score.empty;
-    }
-    final scores = <Score>[];
+  Score searchWords(List<String> words, {double weight = 1.0}) {
+    IndexedScore? score;
     for (final w in words) {
-      final tokens = lookupTokens(w);
-      final values = _scoreDocs(
-        tokens,
-        weight: weight,
-        limitToIds: limitToIds,
-      );
-      if (values.isEmpty) {
-        return Score.empty;
+      final s = IndexedScore(_ids);
+      searchAndAccumulate(w, score: s, weight: weight);
+      if (score == null) {
+        score = s;
+        // reset weight
+        weight = 1.0;
+      } else {
+        score.multiplyAllFrom(s);
+      }
+    }
+    return score?.toScore() ?? Score.empty;
+  }
+}
+
+/// Mutable score list that can accessed via integer index.
+class IndexedScore {
+  final List<String> _keys;
+  final List<double> _values;
+
+  IndexedScore._(this._keys, this._values);
+
+  factory IndexedScore(List<String> keys) =>
+      IndexedScore._(keys, List<double>.filled(keys.length, 0.0));
+
+  late final length = _values.length;
+
+  bool isNotPositive(int index) {
+    return _values[index] <= 0.0;
+  }
+
+  void setValue(int index, double value) {
+    _values[index] = value;
+  }
+
+  void setValueMaxOf(int index, double value) {
+    _values[index] = math.max(_values[index], value);
+  }
+
+  void multiplyAllFrom(IndexedScore other) {
+    assert(other._values.length == _values.length);
+    for (var i = 0; i < _values.length; i++) {
+      if (_values[i] == 0.0) continue;
+      final v = other._values[i];
+      _values[i] = v == 0.0 ? 0.0 : _values[i] * v;
+    }
+  }
+
+  Score toScore() {
+    final map = <String, double>{};
+    for (var i = 0; i < _values.length; i++) {
+      final v = _values[i];
+      if (v > 0.0) {
+        map[_keys[i]] = v;
       }
-      scores.add(Score(values));
     }
-    return Score.multiply(scores);
+    return Score._(map);
   }
 }
diff --git a/app/test/search/api_doc_page_test.dart b/app/test/search/api_doc_page_test.dart
index 40b5ca9606..b5d565f201 100644
--- a/app/test/search/api_doc_page_test.dart
+++ b/app/test/search/api_doc_page_test.dart
@@ -99,7 +99,7 @@ void main() {
         'packageHits': [
           {
             'package': 'foo',
-            'score': closeTo(0.18, 0.01), // find WebPageGenerator
+            'score': closeTo(0.26, 0.01), // find WebPageGenerator
             'apiPages': [
               {'path': 'generator.html'},
             ],
@@ -119,7 +119,7 @@ void main() {
         'packageHits': [
           {
             'package': 'foo',
-            'score': closeTo(0.11, 0.01), // find WebPageGenerator
+            'score': closeTo(0.15, 0.01), // find WebPageGenerator
             'apiPages': [
               {'path': 'generator.html'},
             ],