diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs index 0f14f4f2f..7a3134a80 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -67,7 +67,66 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger Results)> SearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) => - await ExactSearchAsync(query, pageNumber, pageSize, ctx); + await HybridSearchWithRrfAsync(query, pageNumber, pageSize, ctx); + + public async Task<(int TotalHits, List Results)> HybridSearchWithRrfAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) + { + _logger.LogInformation("Starting RRF hybrid search for '{Query}' with pageNumber={PageNumber}, pageSize={PageSize}", query, pageNumber, pageSize); + + var searchQuery = query.Replace("dotnet", "net", StringComparison.InvariantCultureIgnoreCase); + + var lexicalSearchRetriever = + ((Query)new PrefixQuery(Infer.Field(f => f.Title.Suffix("keyword")), searchQuery) { Boost = 10.0f, CaseInsensitive = true } + || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Operator = Operator.And, Boost = 8.0f } + || new MatchBoolPrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 6.0f } + || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Boost = 4.0f } + || new MatchQuery(Infer.Field(f => f.Parents.First().Title), searchQuery) { Boost = 2.0f } + || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Fuzziness = 1, Boost = 1.0f } + ) + && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])) + ; + var semanticSearchRetriever = + ((Query)new SemanticQuery("title.semantic_text", searchQuery) { Boost = 5.0f } + || new SemanticQuery("abstract", searchQuery) { Boost = 3.0f } + ) + && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), + new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])) + ; + + try + { + var response = await _client.SearchAsync(s => s + .Indices(_elasticsearchOptions.IndexName) + .Retriever(r => r + .Rrf(rrf => rrf + .Retrievers( + // Lexical/Traditional search retriever + ret => ret.Standard(std => std.Query(lexicalSearchRetriever)), + // Semantic search retriever + ret => ret.Standard(std => std.Query(semanticSearchRetriever)) + ) + .RankConstant(60) // Controls how much weight is given to document ranking + ) + ) + .From((pageNumber - 1) * pageSize) + .Size(pageSize), ctx); + + if (!response.IsValidResponse) + { + _logger.LogWarning("Elasticsearch RRF search response was not valid. Reason: {Reason}", + response.ElasticsearchServerError?.Error?.Reason ?? "Unknown"); + } + else + _logger.LogInformation("RRF search completed for '{Query}'. Total hits: {TotalHits}", query, response.Total); + + return ProcessSearchResponse(response); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error occurred during Elasticsearch RRF search for '{Query}'", query); + throw; + } + } public async Task<(int TotalHits, List Results)> ExactSearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) {