From 96aeda11734efa38deee94f7efe847e6a44a124e Mon Sep 17 00:00:00 2001 From: Jan Calanog Date: Fri, 5 Sep 2025 09:46:48 +0200 Subject: [PATCH 1/3] Use retrievers in search query --- .../Adapters/Search/ElasticsearchGateway.cs | 120 +++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs index 0f14f4f2f..e1245f64d 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -67,7 +67,125 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger Results)> SearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) => - await ExactSearchAsync(query, pageNumber, pageSize, ctx); + await HybridSearchWithRrfAsync(query, pageNumber, pageSize, ctx); + + public async Task<(int TotalHits, List Results)> HybridSearchWithRrfAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) + { + _logger.LogInformation("Starting RRF hybrid search for '{Query}' with pageNumber={PageNumber}, pageSize={PageSize}", query, pageNumber, pageSize); + + var searchQuery = query.Replace("dotnet", "net", StringComparison.InvariantCultureIgnoreCase); + + try + { + var response = await _client.SearchAsync(s => s + .Indices(_elasticsearchOptions.IndexName) + .Retriever(r => r + .Rrf(rrf => rrf + .Retrievers( + // Lexical/Traditional search retriever + ret => ret.Standard(std => std + .Query(q => q + .Bool(b => b + .Should( + // Tier 1: Exact/Prefix matches (highest priority) + sh => sh.Prefix(p => p + .Field("title.keyword") + .Value(searchQuery) + .CaseInsensitive(true) + .Boost(10.0f) // Highest importance - exact prefix matches + ), + // Tier 2: Title matching with AND operator + sh => sh.Match(m => m + .Field(f => f.Title) + .Query(searchQuery) + .Operator(Operator.And) + .Boost(8.0f) // High importance - all terms must match + ), + // Tier 3: Match bool prefix for partial matches + sh => sh.MatchBoolPrefix(m => m + .Field(f => f.Title) + .Query(searchQuery) + .Boost(6.0f) // Medium-high importance - partial matches + ), + // Tier 4: Abstract matching + sh => sh.Match(m => m + .Field(f => f.Abstract) + .Query(searchQuery) + .Boost(4.0f) // Medium importance - content matching + ), + // Tier 5: Parent matching + sh => sh.Match(m => m + .Field("parents.title") + .Query(searchQuery) + .Boost(2.0f) // Lower importance - parent context + ), + // Tier 6: Fuzzy fallback + sh => sh.Match(m => m + .Field(f => f.Title) + .Query(searchQuery) + .Fuzziness(1) + .Boost(1.0f) // Lowest importance - fuzzy fallback + ) + ) + .MustNot(mn => mn.Terms(t => t + .Field("url.keyword") + .Terms(factory => factory.Value("/docs", "/docs/", "/docs/404", "/docs/404/")) + )) + .MinimumShouldMatch(1) + ) + ) + ), + // Semantic search retriever + ret => ret.Standard(std => std + .Query(q => q + .Bool(b => b + .Should( + // Title semantic search + sh => sh.Semantic(sem => sem + .Field("title.semantic_text") + .Query(searchQuery) + .Boost(5.0f) // Higher importance - title semantic matching + ), + // Abstract semantic search + sh => sh.Semantic(sem => sem + .Field("abstract") + .Query(searchQuery) + .Boost(3.0f) // Medium importance - content semantic matching + ) + ) + .MustNot(mn => mn.Terms(t => t + .Field("url.keyword") + .Terms(factory => factory.Value("/docs", "/docs/", "/docs/404", "/docs/404/")) + )) + .MinimumShouldMatch(1) + ) + ) + ) + ) + .RankConstant(60) // Controls how much weight is given to document ranking + ) + ) + .From((pageNumber - 1) * pageSize) + .Size(pageSize), ctx); + + if (!response.IsValidResponse) + { + _logger.LogWarning("Elasticsearch RRF search response was not valid. Reason: {Reason}", + response.ElasticsearchServerError?.Error?.Reason ?? "Unknown"); + } + else + { + _logger.LogInformation("RRF search completed for '{Query}'. Total hits: {TotalHits}", query, response.Total); + } + + return ProcessSearchResponse(response); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error occurred during Elasticsearch RRF search for '{Query}'", query); + throw; + } + } public async Task<(int TotalHits, List Results)> ExactSearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) { From c9472fb894a5bd073d91b0fa6d55119c8e2e4ffd Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Mon, 8 Sep 2025 14:39:23 +0200 Subject: [PATCH 2/3] Use boolean query DSL (#1851) --- .../Adapters/Search/ElasticsearchGateway.cs | 97 ++++--------------- 1 file changed, 20 insertions(+), 77 deletions(-) diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs index e1245f64d..41a94d96b 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -75,6 +75,24 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger(f => f.Title.Suffix("keyword")), searchQuery) { Boost = 10.0f, CaseInsensitive = true } + || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Operator = Operator.And, Boost = 8.0f } + || new MatchBoolPrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 6.0f } + || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Boost = 4.0f } + || new MatchQuery(Infer.Field(f => f.Parents.First().Title), searchQuery) { Boost = 2.0f } + || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Fuzziness = 1, Boost = 1.0f } + ) + && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])) + ; + var semanticSearchRetriever = + ((Query)new SemanticQuery("title.semantic_text", searchQuery) { Boost = 5.0f } + || new SemanticQuery("abstract", searchQuery) { Boost = 3.0f } + ) + && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), + new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])) + ; + try { var response = await _client.SearchAsync(s => s @@ -83,84 +101,9 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger rrf .Retrievers( // Lexical/Traditional search retriever - ret => ret.Standard(std => std - .Query(q => q - .Bool(b => b - .Should( - // Tier 1: Exact/Prefix matches (highest priority) - sh => sh.Prefix(p => p - .Field("title.keyword") - .Value(searchQuery) - .CaseInsensitive(true) - .Boost(10.0f) // Highest importance - exact prefix matches - ), - // Tier 2: Title matching with AND operator - sh => sh.Match(m => m - .Field(f => f.Title) - .Query(searchQuery) - .Operator(Operator.And) - .Boost(8.0f) // High importance - all terms must match - ), - // Tier 3: Match bool prefix for partial matches - sh => sh.MatchBoolPrefix(m => m - .Field(f => f.Title) - .Query(searchQuery) - .Boost(6.0f) // Medium-high importance - partial matches - ), - // Tier 4: Abstract matching - sh => sh.Match(m => m - .Field(f => f.Abstract) - .Query(searchQuery) - .Boost(4.0f) // Medium importance - content matching - ), - // Tier 5: Parent matching - sh => sh.Match(m => m - .Field("parents.title") - .Query(searchQuery) - .Boost(2.0f) // Lower importance - parent context - ), - // Tier 6: Fuzzy fallback - sh => sh.Match(m => m - .Field(f => f.Title) - .Query(searchQuery) - .Fuzziness(1) - .Boost(1.0f) // Lowest importance - fuzzy fallback - ) - ) - .MustNot(mn => mn.Terms(t => t - .Field("url.keyword") - .Terms(factory => factory.Value("/docs", "/docs/", "/docs/404", "/docs/404/")) - )) - .MinimumShouldMatch(1) - ) - ) - ), + ret => ret.Standard(std => std.Query(lexicalSearchRetriever)), // Semantic search retriever - ret => ret.Standard(std => std - .Query(q => q - .Bool(b => b - .Should( - // Title semantic search - sh => sh.Semantic(sem => sem - .Field("title.semantic_text") - .Query(searchQuery) - .Boost(5.0f) // Higher importance - title semantic matching - ), - // Abstract semantic search - sh => sh.Semantic(sem => sem - .Field("abstract") - .Query(searchQuery) - .Boost(3.0f) // Medium importance - content semantic matching - ) - ) - .MustNot(mn => mn.Terms(t => t - .Field("url.keyword") - .Terms(factory => factory.Value("/docs", "/docs/", "/docs/404", "/docs/404/")) - )) - .MinimumShouldMatch(1) - ) - ) - ) + ret => ret.Standard(std => std.Query(semanticSearchRetriever)) ) .RankConstant(60) // Controls how much weight is given to document ranking ) From adedf60b3f348797f4a1a1da86f859f2f2ae6fd0 Mon Sep 17 00:00:00 2001 From: Jan Calanog Date: Mon, 8 Sep 2025 14:53:39 +0200 Subject: [PATCH 3/3] Update src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs --- .../Adapters/Search/ElasticsearchGateway.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs index 41a94d96b..7a3134a80 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -117,9 +117,7 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger