Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ Important changes to data models, configuration, and migrations between each
AppEngine version, listed here to ease deployment and troubleshooting.

## Next Release (replace with git tag when deployed)
* `search` uses the `IndexedScore` to reduce memory allocations.

## `20241031t095600-all`
* Bumped runtimeVersion to `2024.10.29`.
Expand Down
80 changes: 35 additions & 45 deletions app/lib/search/mem_index.dart
Original file line number Diff line number Diff line change
Expand Up @@ -304,32 +304,28 @@ class InMemoryPackageIndex {
// We cannot update the main `packages` variable yet, as the dartdoc API
// symbols are added on top of the core results, and `packages` is used
// there too.
final coreScores = <Score>[];
var wordScopedPackages = packages;
final coreScores = IndexedScore(_packageNameIndex._packageNames);
for (var i = 0; i < _documents.length; i++) {
if (packages.contains(_documents[i].package)) {
coreScores.setValue(i, 1.0);
}
}
for (final word in words) {
final nameScore = _packageNameIndex.searchWord(word,
filterOnPackages: wordScopedPackages);
if (includeNameMatches && _documentsByName.containsKey(word)) {
nameMatches ??= <String>{};
nameMatches.add(word);
}

final descr = _descrIndex
.searchWords([word], weight: 0.90, limitToIds: wordScopedPackages);
final readme = _readmeIndex
.searchWords([word], weight: 0.75, limitToIds: wordScopedPackages);
final score = Score.max([nameScore, descr, readme]);
coreScores.add(score);
// don't update if the query is single-word
if (words.length > 1) {
wordScopedPackages = score.keys.toSet();
if (wordScopedPackages.isEmpty) {
break;
}
}
final wordScore =
_packageNameIndex.searchWord(word, filterOnNonZeros: coreScores);
_descrIndex.searchAndAccumulate(word,
weight: 0.90.toDouble(), score: wordScore);
_readmeIndex.searchAndAccumulate(word,
weight: 0.75.toDouble(), score: wordScore);
coreScores.multiplyAllFrom(wordScore);
}

final core = Score.multiply(coreScores);
final core = coreScores.toScore();

var symbolPages = Score.empty;
if (!checkAborted()) {
Expand Down Expand Up @@ -495,16 +491,13 @@ class _TextResults {
@visibleForTesting
class PackageNameIndex {
final List<String> _packageNames;
late final Map<String, _PkgNameData> _data;
late final List<_PkgNameData> _data;

PackageNameIndex(this._packageNames) {
_data = Map.fromEntries(_packageNames.map((package) {
_data = _packageNames.map((package) {
final collapsed = _collapseName(package);
return MapEntry(
package,
_PkgNameData(collapsed, trigrams(collapsed).toSet()),
);
}));
return _PkgNameData(collapsed, trigrams(collapsed).toSet());
}).toList();
}

/// Maps package name to a reduced form of the name:
Expand All @@ -515,45 +508,42 @@ class PackageNameIndex {
/// Search [text] and return the matching packages with scores.
@visibleForTesting
Score search(String text) {
Score? score;
IndexedScore? score;
for (final w in splitForQuery(text)) {
final s = searchWord(w, filterOnPackages: score?.keys);
final s = searchWord(w, filterOnNonZeros: score);
if (score == null) {
score = s;
} else {
// Note: on one hand, it is inefficient to multiply the [Score] on each
// iteration. However, (1) this is only happening in test, (2) it may be
// better for the next iteration to work on a more limited `filterOnPackages`,
// and (3) it will be updated to a more efficient in-place update (#8225).
score = Score.multiply([score, s]);
score.multiplyAllFrom(s);
}
}
return score ?? Score.empty;
return score?.toScore() ?? Score.empty;
}

/// Search using the parsed [word] and return the matching packages with scores
/// as a new [Score] instance.
/// as a new [IndexedScore] instance.
///
/// When [filterOnPackages] is present, only the names present are evaluated.
Score searchWord(
/// When [filterOnNonZeros] is present, only the indexes with an already
/// non-zero value are evaluated.
IndexedScore searchWord(
String word, {
Iterable<String>? filterOnPackages,
IndexedScore? filterOnNonZeros,
}) {
final pkgNamesToCheck = filterOnPackages ?? _packageNames;
final values = <String, double>{};
final score = IndexedScore(_packageNames);
final singularWord = word.length <= 3 || !word.endsWith('s')
? word
: word.substring(0, word.length - 1);
final collapsedWord = _collapseName(singularWord);
final parts =
collapsedWord.length <= 3 ? [collapsedWord] : trigrams(collapsedWord);
for (final pkg in pkgNamesToCheck) {
final entry = _data[pkg];
if (entry == null) {
for (var i = 0; i < _data.length; i++) {
if (filterOnNonZeros?.isNotPositive(i) ?? false) {
continue;
}

final entry = _data[i];
if (entry.collapsed.contains(collapsedWord)) {
values[pkg] = 1.0;
score.setValue(i, 1.0);
continue;
}
var matched = 0;
Expand All @@ -567,11 +557,11 @@ class PackageNameIndex {
if (matched > 0) {
final v = matched / parts.length;
if (v >= 0.5) {
values[pkg] = v;
score.setValue(i, v);
}
}
}
return Score(values);
return score;
}
}

Expand Down
42 changes: 42 additions & 0 deletions app/lib/search/token_index.dart
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,24 @@ class TokenIndex {
}
return Score.multiply(scores);
}

/// Searches the index with [word] and stores the results in [score], using
/// accumulation operation on the already existing values.
void searchAndAccumulate(
String word, {
double weight = 1.0,
required IndexedScore score,
}) {
assert(score.length == _length);
final tokenMatch = lookupTokens(word);
for (final token in tokenMatch.tokens) {
final matchWeight = tokenMatch[token]!;
final tokenWeight = _inverseIds[token]!;
for (final e in tokenWeight.entries) {
score.setValueMaxOf(e.key, matchWeight * e.value * weight);
}
}
}
}

/// Mutable score list that can accessed via integer index.
Expand All @@ -285,6 +303,10 @@ class IndexedScore {
return _values[index] <= 0.0;
}

void setValue(int index, double value) {
_values[index] = value;
}

void setValueMaxOf(int index, double value) {
_values[index] = math.max(_values[index], value);
}
Expand All @@ -307,6 +329,15 @@ class IndexedScore {
}
}

void multiplyAllFrom(IndexedScore other) {
assert(other._values.length == _values.length);
for (var i = 0; i < _values.length; i++) {
if (_values[i] == 0.0) continue;
final v = other._values[i];
_values[i] = v == 0.0 ? 0.0 : _values[i] * v;
}
}

Set<String> toKeySet() {
final set = <String>{};
for (var i = 0; i < _values.length; i++) {
Expand All @@ -317,4 +348,15 @@ class IndexedScore {
}
return set;
}

Score toScore() {
final map = <String, double>{};
for (var i = 0; i < _values.length; i++) {
final v = _values[i];
if (v > 0.0) {
map[_keys[i]] = v;
}
}
return Score._(map);
}
}
Loading