From 0e9068627661d7bb372c8840699ac2b2592426b5 Mon Sep 17 00:00:00 2001 From: Istvan Soos Date: Wed, 2 Apr 2025 10:53:15 +0200 Subject: [PATCH] Slightly lower score for depluralized matches. --- app/lib/search/mem_index.dart | 41 +++++++++++++++----- app/test/search/maps_test.dart | 2 +- app/test/search/mem_index_test.dart | 2 +- app/test/search/package_name_index_test.dart | 16 ++++++++ 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/app/lib/search/mem_index.dart b/app/lib/search/mem_index.dart index 364dd3eec5..1979272882 100644 --- a/app/lib/search/mem_index.dart +++ b/app/lib/search/mem_index.dart @@ -619,11 +619,20 @@ class PackageNameIndex { IndexedScore? filterOnNonZeros, }) { assert(score.keys.length == _packageNames.length); - final singularWord = word.length <= 3 || !word.endsWith('s') - ? word - : word.substring(0, word.length - 1); - final lowercasedWord = singularWord.toLowerCase(); + final lowercasedWord = word.toLowerCase(); final collapsedWord = _removeUnderscores(lowercasedWord); + + // Note: This is a very simple plurality check, which may not work wor many + // cases, but provided a simple approximation. + // The check should be done on the lowercased input value. + final lowercaseDepluralized = + lowercasedWord.length <= 3 || !lowercasedWord.endsWith('s') + ? null + : lowercasedWord.substring(0, lowercasedWord.length - 1); + final collapsedDepluralized = lowercaseDepluralized == null + ? null + : _removeUnderscores(lowercaseDepluralized); + final parts = collapsedWord.length <= 3 ? [collapsedWord] : trigrams(collapsedWord); for (var i = 0; i < _data.length; i++) { @@ -632,18 +641,24 @@ class PackageNameIndex { } final entry = _data[i]; - if (entry.collapsed.length >= collapsedWord.length && - entry.collapsed.contains(collapsedWord)) { - // also check for non-collapsed match - if (entry.lowercased.length >= lowercasedWord.length && - entry.lowercased.contains(lowercasedWord)) { + // Check for direct substring match. + // TODO: Consider using trie or other substring index here. + if (entry._containsCollapsed(collapsedDepluralized ?? collapsedWord)) { + // most score for original non-collapsed match + if (entry._containsLowercased(lowercasedWord)) { score.setValue(i, 1.0); continue; } + // otherwise a slightly lower score for: + // - collapsed-only original match + // - non-collapsed depluralized match + // - collapsed depluralized match score.setValue(i, 0.99); + continue; } + var matched = 0; var unmatched = 0; final acceptThreshold = parts.length ~/ 2; @@ -682,6 +697,14 @@ class _PkgNameData { final Set trigrams; _PkgNameData(this.lowercased, this.collapsed, this.trigrams); + + bool _containsLowercased(String value) { + return lowercased.length >= value.length && lowercased.contains(value); + } + + bool _containsCollapsed(String value) { + return collapsed.length >= value.length && collapsed.contains(value); + } } extension on List { diff --git a/app/test/search/maps_test.dart b/app/test/search/maps_test.dart index af69e3fcc1..5ebbb9e66c 100644 --- a/app/test/search/maps_test.dart +++ b/app/test/search/maps_test.dart @@ -30,7 +30,7 @@ void main() { 'sdkLibraryHits': [], 'packageHits': [ {'package': 'maps', 'score': 1.0}, - {'package': 'map', 'score': 1.0}, + {'package': 'map', 'score': 0.99}, ], }); }); diff --git a/app/test/search/mem_index_test.dart b/app/test/search/mem_index_test.dart index 1e751e7c0b..71119f1e93 100644 --- a/app/test/search/mem_index_test.dart +++ b/app/test/search/mem_index_test.dart @@ -562,8 +562,8 @@ server.dart adds a small, prescriptive server (PicoServer) that can be configure final match2 = index.search( ServiceSearchQuery.parse(query: 'apps', order: SearchOrder.text)); expect(match2.packageHits.map((e) => e.toJson()), [ - {'package': 'app', 'score': 1.0}, {'package': 'apps', 'score': 1.0}, + {'package': 'app', 'score': 0.99}, ]); }); diff --git a/app/test/search/package_name_index_test.dart b/app/test/search/package_name_index_test.dart index 2ec47ed645..198fb88def 100644 --- a/app/test/search/package_name_index_test.dart +++ b/app/test/search/package_name_index_test.dart @@ -127,4 +127,20 @@ void main() { }); }); }); + + group('redis', () { + final index = PackageNameIndex([ + 'redis', + 'x_redis_client', + 'credit_union', + ]); + + test('redis', () { + expect(index.search('redis'), { + 'redis': 1.0, + 'x_redis_client': 1.0, + 'credit_union': 0.99, + }); + }); + }); }