From b64a334a4106d759b91936491155b9ecc986d282 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Thu, 27 Nov 2025 13:31:09 +0100 Subject: [PATCH 1/5] New query for search with updated relevance tests --- .editorconfig | 5 +- config/synonyms.yml | 4 +- docs-builder.slnx | 1 + ...astic.Documentation.ServiceDefaults.csproj | 1 - .../Search/DocumentationDocument.cs | 7 + .../Elasticsearch/ElasticsearchExporter.cs | 39 ++- .../ElasticsearchMarkdownExporter.cs | 8 + .../Search/SearchUsecase.cs | 1 - .../Adapters/Search/ElasticsearchGateway.cs | 216 ++++++++----- .../Aws/LocalParameterProvider.cs | 21 +- .../Elastic.Documentation.Api.Lambda.csproj | 1 - .../Search/SearchBootstrapFixture.cs | 283 ++++++++++++++++++ .../Search/SearchRelevanceTests.cs | 56 ++-- .../Search/SearchTestBase.cs | 279 ----------------- .../Search.IntegrationTests.csproj | 14 + .../SearchRelevanceTests.cs | 224 ++++++++++++++ 16 files changed, 757 insertions(+), 403 deletions(-) create mode 100644 tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs create mode 100644 tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj create mode 100644 tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs diff --git a/.editorconfig b/.editorconfig index 2216430f8..78054a2f1 100644 --- a/.editorconfig +++ b/.editorconfig @@ -172,7 +172,7 @@ csharp_new_line_before_open_brace = all csharp_new_line_before_else = true csharp_new_line_before_catch = true csharp_new_line_before_finally = true -csharp_new_line_before_members_in_object_initializers = true +csharp_new_line_before_members_in_object_initializers = false # just a suggestion do to our JSON tests that use anonymous types to # represent json quite a bit (makes copy paste easier). @@ -190,6 +190,9 @@ csharp_space_between_method_call_parameter_list_parentheses = false #Wrap csharp_preserve_single_line_statements = false csharp_preserve_single_line_blocks = true +csharp_place_simple_initializer_on_single_line = true +csharp_max_initializer_elements_on_line = 5 +csharp_wrap_object_and_collection_initializer_style = wrap_if_long resharper_wrap_object_and_collection_initializer_style = chop_always # Resharper diff --git a/config/synonyms.yml b/config/synonyms.yml index a8707dbe6..79403ec52 100644 --- a/config/synonyms.yml +++ b/config/synonyms.yml @@ -29,4 +29,6 @@ synonyms: - [ "ecs", "elastic common schema" ] - [ "ml", "machine learning" ] - [ "eis", "elastic inference service" ] - - [ "traffic filter", "network security" ] \ No newline at end of file + - [ "traffic filter", "network security" ] + - [ "sso", "single sign-on" ] + - [ "querydsl", "query dsl", "query dsl"] diff --git a/docs-builder.slnx b/docs-builder.slnx index 89bab2d9b..4d030b6ef 100644 --- a/docs-builder.slnx +++ b/docs-builder.slnx @@ -78,6 +78,7 @@ + diff --git a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj index 155e8056c..4357d65ce 100644 --- a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj +++ b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj @@ -8,7 +8,6 @@ - diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs index 78207d7a5..af2185e43 100644 --- a/src/Elastic.Documentation/Search/DocumentationDocument.cs +++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs @@ -38,6 +38,13 @@ public record DocumentationDocument [JsonPropertyName("hash")] public string Hash { get; set; } = string.Empty; + /// + /// Search title is a combination of the title and the url components. + /// This is used for querying to not reward documents with short titles contributing to heavily to scoring + /// + [JsonPropertyName("search_title")] + public string? SearchTitle { get; set; } + [JsonPropertyName("title")] public string? Title { get; set; } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs index 42a06ad81..7dc869c7d 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs @@ -151,7 +151,8 @@ protected static string CreateMappingSetting(string synonymSetName) => "tokenizer": "group_tokenizer", "filter": [ "lowercase", - "synonyms_filter" + "synonyms_filter", + "kstem" ] }, "highlight_analyzer": { @@ -228,13 +229,19 @@ protected static string CreateMapping(string? inferenceId) => } }, "hash" : { "type" : "keyword" }, + "search_title": { + "type": "text", + "search_analyzer": "synonyms_analyzer", + "fields": { + "completion": { "type": "search_as_you_type" } + } + }, "title": { "type": "text", "search_analyzer": "synonyms_analyzer", "fields": { - "keyword": { - "type": "keyword" - } + "keyword": { "type": "keyword" }, + "completion": { "type": "search_as_you_type", "search_analyzer": "synonyms_analyzer" } {{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}} } }, @@ -252,34 +259,24 @@ protected static string CreateMapping(string? inferenceId) => "headings": { "type": "text", "search_analyzer": "synonyms_analyzer" + }, + "abstract": { + "type" : "text", + "search_analyzer": "synonyms_analyzer", + "fields" : { + {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} + } } - {{(!string.IsNullOrWhiteSpace(inferenceId) ? AbstractInferenceMapping(inferenceId) : AbstractMapping())}} } } """; - private static string AbstractMapping() => - """ - , "abstract": { - "type": "text", - "search_analyzer": "synonyms_analyzer" - } - """; - private static string InferenceMapping(string inferenceId) => $""" "type": "semantic_text", "inference_id": "{inferenceId}" """; - private static string AbstractInferenceMapping(string inferenceId) => - // langugage=json - $$""" - , "abstract": { - {{InferenceMapping(inferenceId)}} - } - """; - public void Dispose() { diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs index b793fc89c..42f7162bc 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs @@ -400,6 +400,12 @@ private void AssignDocumentMetadata(DocumentationDocument doc) doc.BatchIndexDate = _batchIndexDate; } + private void CommonEnrichments(DocumentationDocument doc) + { + var urlComponents = doc.Url.Split('/'); + doc.SearchTitle = $"{doc.Title} - ({string.Join(" ", urlComponents)}"; + } + public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, Cancel ctx) { var file = fileContext.SourceFile; @@ -455,6 +461,7 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, }; AssignDocumentMetadata(doc); + CommonEnrichments(doc); if (_indexStrategy == IngestStrategy.Multiplex) return await _lexicalChannel.TryWrite(doc, ctx) && await _semanticChannel.TryWrite(doc, ctx); @@ -489,6 +496,7 @@ public async ValueTask FinishExportAsync(IDirectoryInfo outputFolder, Canc : string.Empty; doc.Abstract = @abstract; doc.Headings = headings; + CommonEnrichments(doc); // Write to channels following the multiplex or reindex strategy if (_indexStrategy == IngestStrategy.Multiplex) diff --git a/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs b/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs index 4c696c820..20d6ed6f9 100644 --- a/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs +++ b/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs @@ -2,7 +2,6 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using System.Diagnostics; using Microsoft.Extensions.Logging; namespace Elastic.Documentation.Api.Core.Search; diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs index 6d943a884..8542f83e0 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -19,6 +19,9 @@ internal sealed record DocumentDto [JsonPropertyName("title")] public required string Title { get; init; } + [JsonPropertyName("search_title")] + public required string SearchTitle { get; init; } + [JsonPropertyName("url")] public required string Url { get; init; } @@ -86,46 +89,88 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger /// Builds the lexical search query for the given search term. /// - private static Query BuildLexicalQuery(string searchQuery) => - ((Query)new PrefixQuery(Infer.Field(f => f.Title.Suffix("keyword")), searchQuery) { Boost = 10.0f, CaseInsensitive = true } - || new MatchPhrasePrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 9.0f } - || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Operator = Operator.And, Boost = 8.0f } - || new MatchBoolPrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 6.0f } - || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.And, Boost = 5.0f } - || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.And, Boost = 4.5f } - || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.And, Boost = 4.5f } - || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.Or, Boost = 4.0f } - || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.Or, Boost = 3.0f } - || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.Or, Boost = 3.0f } - || new MatchQuery(Infer.Field(f => f.Parents.First().Title), searchQuery) { Boost = 2.0f } - || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Fuzziness = 1, Boost = 1.0f } - ) - && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])); + private static Query BuildLexicalQuery(string searchQuery) + { + var tokens = searchQuery.Split(" "); + if (tokens is ["datastream" or "datastreams" or "data-stream" or "data-streams"]) + { + // /docs/api/doc/kibana/operation/operation-delete-fleet-epm-packages-pkgname-pkgversion-datastream-assets + // Is the only page that uses "datastream" instead of "data streams" this gives it an N of 1 in the entire corpus + // which is hard to fix through tweaking boosting, should update the page to use "data streams" instead + searchQuery = "data streams"; + tokens = ["data", "streams"]; + } + + var query = + (Query)new MultiMatchQuery + { + Query = searchQuery, Operator = Operator.And, Type = TextQueryType.BoolPrefix, + Analyzer = "synonyms_analyzer", + Boost = 2.0f, + Fields = new[] + { + "search_title.completion", + "search_title.completion._2gram", + "search_title.completion._3gram" + } + } + || new MultiMatchQuery + { + Query = searchQuery, Operator = Operator.And, Type = TextQueryType.BestFields, + Analyzer = "synonyms_analyzer", + Boost = 0.2f, + Fields = new[] + { + "stripped_body" + } + }; + // If the search term is a single word, boost the URL match + // This is to ensure that URLs that contain the search term are ranked higher than URLs that don't + // We dampen the boost by wrapping it in a constant score query + // This allows a query for `templates` which is an overloaded term to yield pages that contain `templates` in the URL + if (tokens.Length == 1) + { + query |= new ConstantScoreQuery + { + Filter = new MatchQuery + { + Field = Infer.Field(f => f.Url.Suffix("match")), + Query = searchQuery + }, + Boost = 1 + }; + } + + return new BoostingQuery + { + Positive = query, + NegativeBoost = 0.8, + Negative = new MultiMatchQuery + { + Query = "plugin client integration", Operator = Operator.Or, Fields = new[] { "search_title", "headings", "url.match" } + } + }; + } /// /// Builds the semantic search query for the given search term. /// private static Query BuildSemanticQuery(string searchQuery) => - ((Query)new SemanticQuery("title.semantic_text", searchQuery) { Boost = 5.0f } - || new SemanticQuery("abstract.semantic_text", searchQuery) { Boost = 3.0f } - ) - && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), - new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])); + (Query)new SemanticQuery("title.semantic_text", searchQuery) { Boost = 5.0f } + || new SemanticQuery("abstract.semantic_text", searchQuery) { Boost = 3.0f }; - /// - /// Normalizes the search query by replacing "dotnet" with "net". - /// - private static string NormalizeSearchQuery(string query) => - query.Replace("dotnet", "net", StringComparison.InvariantCultureIgnoreCase); + private static Query BuildFilter() => !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), + new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])); - public async Task<(int TotalHits, List Results)> HybridSearchWithRrfAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) + public async Task<(int TotalHits, List Results)> HybridSearchWithRrfAsync(string query, int pageNumber, int pageSize, + Cancel ctx = default) { _logger.LogInformation("Starting RRF hybrid search for '{Query}' with pageNumber={PageNumber}, pageSize={PageSize}", query, pageNumber, pageSize); const string preTag = ""; const string postTag = ""; - var searchQuery = NormalizeSearchQuery(query); + var searchQuery = query; var lexicalSearchRetriever = BuildLexicalQuery(searchQuery); var semanticSearchRetriever = BuildSemanticQuery(searchQuery); @@ -133,52 +178,71 @@ private static string NormalizeSearchQuery(string query) => { var response = await _client.SearchAsync(s => s .Indices(_elasticsearchOptions.IndexName) - .Retriever(r => r - .Rrf(rrf => rrf - .Retrievers( - // Lexical/Traditional search retriever - ret => ret.Standard(std => std.Query(lexicalSearchRetriever)), - // Semantic search retriever - ret => ret.Standard(std => std.Query(semanticSearchRetriever)) + .From(Math.Max(pageNumber - 1, 0) * pageSize) + .Size(pageSize) + .PostFilter(BuildFilter()) + .Query(BuildLexicalQuery(query)) + // .Retriever(r => r + // .Rrf(rrf => rrf + // .Filter(BuildFilter()) + // .Retrievers( + // // Lexical/Traditional search retriever + // ret => ret.Standard(std => std.Query(lexicalSearchRetriever)), + // // Semantic search retriever + // ret => ret.Standard(std => std.Query(semanticSearchRetriever)) + // ) + // .RankConstant(60) // Controls how much weight is given to document ranking + // .RankWindowSize(100) + // ) + // ) + .Source(sf => sf + .Filter(f => f + .Includes( + e => e.Type, + e => e.Title, + e => e.SearchTitle, + e => e.Url, + e => e.Description, + e => e.Parents, + e => e.Headings ) - .RankConstant(60) // Controls how much weight is given to document ranking - .RankWindowSize(100) ) ) - .From((pageNumber - 1) * pageSize) - .Size(pageSize) - .Source(sf => sf - .Filter(f => f - .Includes( - e => e.Type, - e => e.Title, - e => e.Url, - e => e.Description, - e => e.Parents, - e => e.Headings - ) - ) - ) - .Highlight(h => h - .RequireFieldMatch(true) - .Fields(f => f - .Add(Infer.Field(d => d.StrippedBody), hf => hf - .FragmentSize(150) - .NumberOfFragments(3) - .NoMatchSize(150) - .BoundaryChars(":.!?\t\n") - .BoundaryScanner(BoundaryScanner.Sentence) - .BoundaryMaxScan(15) - .FragmentOffset(0) - .HighlightQuery(q => q.Match(m => m - .Field(d => d.StrippedBody) - .Query(searchQuery) - .Analyzer("highlight_analyzer") - )) - .PreTags(preTag) - .PostTags(postTag)) - ) - ), ctx); + .Highlight(h => h + .RequireFieldMatch(true) + .Fields(f => f + .Add(Infer.Field(d => d.Title), hf => hf + .FragmentSize(150) + .NumberOfFragments(3) + .NoMatchSize(150) + .BoundaryChars(":.!?\t\n") + .BoundaryScanner(BoundaryScanner.Sentence) + .BoundaryMaxScan(15) + .FragmentOffset(0) + .HighlightQuery(q => q.Match(m => m + .Field(d => d.Title) + .Query(searchQuery) + .Analyzer("highlight_analyzer") + )) + .PreTags(preTag) + .PostTags(postTag)) + .Add(Infer.Field(d => d.StrippedBody), hf => hf + .FragmentSize(150) + .NumberOfFragments(3) + .NoMatchSize(150) + .BoundaryChars(":.!?\t\n") + .BoundaryScanner(BoundaryScanner.Sentence) + .BoundaryMaxScan(15) + .FragmentOffset(0) + .HighlightQuery(q => q.Match(m => m + .Field(d => d.StrippedBody) + .Query(searchQuery) + .Analyzer("highlight_analyzer") + )) + .PreTags(preTag) + .PostTags(postTag)) + ) + ), ctx); if (!response.IsValidResponse) { @@ -240,14 +304,14 @@ private static (int TotalHits, List Results) ProcessSearchResp /// public async Task ExplainDocumentAsync(string query, string documentUrl, Cancel ctx = default) { - var searchQuery = NormalizeSearchQuery(query); + var searchQuery = query; var lexicalQuery = BuildLexicalQuery(searchQuery); - var semanticQuery = BuildSemanticQuery(searchQuery); + //var semanticQuery = BuildSemanticQuery(searchQuery); // Combine queries with bool should to match RRF behavior var combinedQuery = (Query)new BoolQuery { - Should = [lexicalQuery, semanticQuery], + Should = [lexicalQuery], MinimumShouldMatch = 1 }; @@ -263,6 +327,7 @@ public async Task ExplainDocumentAsync(string query, string docum { return new ExplainResult { + SearchTitle = "N/A", DocumentUrl = documentUrl, Found = false, Explanation = $"Document with URL '{documentUrl}' not found in index" @@ -279,6 +344,7 @@ public async Task ExplainDocumentAsync(string query, string docum { return new ExplainResult { + SearchTitle = "N/A", DocumentUrl = documentUrl, Found = true, Matched = false, @@ -289,6 +355,7 @@ public async Task ExplainDocumentAsync(string query, string docum return new ExplainResult { DocumentUrl = documentUrl, + SearchTitle = getDocResponse.Documents.First().SearchTitle, Found = true, Matched = explainResponse.Matched, Score = explainResponse.Explanation?.Value ?? 0, @@ -300,6 +367,7 @@ public async Task ExplainDocumentAsync(string query, string docum _logger.LogError(ex, "Error explaining document '{Url}' for query '{Query}'", documentUrl, query); return new ExplainResult { + SearchTitle = "N/A", DocumentUrl = documentUrl, Found = false, Explanation = $"Exception during explain: {ex.Message}" @@ -346,6 +414,7 @@ private static string FormatExplanation(Elastic.Clients.Elasticsearch.Core.Expla { var emptyResult = new ExplainResult { + SearchTitle = "N/A", DocumentUrl = "N/A", Found = false, Explanation = "No search results returned" @@ -366,6 +435,7 @@ private static string FormatExplanation(Elastic.Clients.Elasticsearch.Core.Expla /// public sealed record ExplainResult { + public required string SearchTitle { get; init; } public required string DocumentUrl { get; init; } public bool Found { get; init; } public bool Matched { get; init; } diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs index 24afadb08..b1bbfe64d 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs @@ -2,10 +2,27 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using Microsoft.Extensions.Configuration; + namespace Elastic.Documentation.Api.Infrastructure.Aws; public class LocalParameterProvider : IParameterProvider { + private readonly string? _elasticUrlFromSecret; + private readonly string? _elasticApiKeyFromSecret; + + public LocalParameterProvider() + { + // Build a new ConfigurationBuilder to read user secrets + var configBuilder = new ConfigurationBuilder(); + _ = configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); + var userSecretsConfig = configBuilder.Build(); + + _elasticUrlFromSecret = userSecretsConfig["Parameters:DocumentationElasticUrl"]; + _elasticApiKeyFromSecret = userSecretsConfig["Parameters:DocumentationElasticApiKey"]; + + } + public async Task GetParam(string name, bool withDecryption = true, Cancel ctx = default) { switch (name) @@ -24,11 +41,11 @@ public async Task GetParam(string name, bool withDecryption = true, Canc } case "docs-elasticsearch-url": { - return GetEnv("DOCUMENTATION_ELASTIC_URL"); + return GetEnv("DOCUMENTATION_ELASTIC_URL", _elasticUrlFromSecret); } case "docs-elasticsearch-apikey": { - return GetEnv("DOCUMENTATION_ELASTIC_APIKEY"); + return GetEnv("DOCUMENTATION_ELASTIC_APIKEY", _elasticApiKeyFromSecret); } case "docs-kibana-url": { diff --git a/src/api/Elastic.Documentation.Api.Lambda/Elastic.Documentation.Api.Lambda.csproj b/src/api/Elastic.Documentation.Api.Lambda/Elastic.Documentation.Api.Lambda.csproj index b209a46ba..061b59867 100644 --- a/src/api/Elastic.Documentation.Api.Lambda/Elastic.Documentation.Api.Lambda.csproj +++ b/src/api/Elastic.Documentation.Api.Lambda/Elastic.Documentation.Api.Lambda.csproj @@ -21,7 +21,6 @@ - diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs new file mode 100644 index 000000000..8438bd58f --- /dev/null +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs @@ -0,0 +1,283 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Aspire.Hosting.ApplicationModel; +using Aspire.Hosting.Testing; +using Documentation.Builder.Diagnostics.Console; +using Elastic.Documentation.Aspire; +using Elastic.Documentation.Configuration; +using Elastic.Ingest.Elasticsearch; +using Elastic.Markdown.Exporters.Elasticsearch; +using Elastic.Transport; +using Elastic.Transport.Products.Elasticsearch; +using FluentAssertions; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; + +namespace Elastic.Assembler.IntegrationTests.Search; + +[CollectionDefinition(Collection)] +public class SearchBootstrapFixture(DocumentationFixture fixture) : IAsyncLifetime +{ + public const string Collection = "Search"; + public HttpClient HttpClient { get; private set; } = null!; + public bool Connected { get; private set; } + + /// + /// Initializes the test by ensuring AssemblerServe (which hosts the API) is healthy and Elasticsearch is indexed. + /// Checks if the remote Elasticsearch already has up-to-date data to avoid unnecessary indexing. + /// + public async ValueTask InitializeAsync() + { + try + { + // Wait for AssemblerServe to be ready (it hosts the embedded Lambda API) + Console.WriteLine("Waiting for AssemblerServe (with embedded API) to become healthy..."); + await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceHealthyAsync(ResourceNames.AssemblerServe, cancellationToken: TestContext.Current.CancellationToken) + .WaitAsync(TimeSpan.FromMinutes(2), TestContext.Current.CancellationToken); + + Console.WriteLine("AssemblerServe is healthy. Creating HTTP client..."); + + // Get the HTTP client for AssemblerServe which includes the API endpoints + HttpClient = fixture.DistributedApplication.CreateHttpClient(ResourceNames.AssemblerServe, "http"); + HttpClient.Should().NotBeNull("Should be able to create HTTP client for AssemblerServe"); + + // Check if Elasticsearch already has up-to-date data + var indexingNeeded = await IsIndexingNeeded(); + + if (!Connected) + { + Console.WriteLine("Can not connect to Elasticsearch. Skipping indexing."); + return; + } + + if (!indexingNeeded) + { + Console.WriteLine("Elasticsearch already has up-to-date data. Skipping indexing."); + return; + } + + Console.WriteLine("Elasticsearch needs indexing. Manually starting indexer..."); + + // The indexer always has WithExplicitStart(), so we must manually start it + // Get the ResourceLoggerService to send the start command + fixture.DistributedApplication.Services + .GetRequiredService(); + + // Get the resource notification service to find the resource + fixture.DistributedApplication.Services + .GetRequiredService(); + + // Wait for the resource to be available + var resourceEvent = await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceAsync(ResourceNames.ElasticsearchIngest, _ => true, TestContext.Current.CancellationToken) + .WaitAsync(TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); + + // Get the resource instance + var resource = resourceEvent.Resource; + + // Execute the start command using ResourceCommandAnnotation + var startCommand = resource.Annotations.OfType() + .FirstOrDefault(a => a.Name == "resource-start"); + + if (startCommand != null) + { + Console.WriteLine($"Executing start command for {ResourceNames.ElasticsearchIngest}..."); + + // Create ExecuteCommandContext for the start command + var commandContext = new ExecuteCommandContext + { + ResourceName = resourceEvent.ResourceId, + ServiceProvider = fixture.DistributedApplication.Services, + CancellationToken = TestContext.Current.CancellationToken + }; + + await startCommand.ExecuteCommand(commandContext); + Console.WriteLine($"Start command executed for {ResourceNames.ElasticsearchIngest}"); + } + else + { + throw new Exception($"Could not find start command for {ResourceNames.ElasticsearchIngest}"); + } + + Console.WriteLine("Waiting for indexer to complete..."); + + // Wait for the indexer to complete + _ = await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceAsync(ResourceNames.ElasticsearchIngest, KnownResourceStates.TerminalStates, + cancellationToken: TestContext.Current.CancellationToken) + .WaitAsync(TimeSpan.FromMinutes(10), TestContext.Current.CancellationToken); + + Console.WriteLine("Elasticsearch indexer reached terminal state. Validating exit code..."); + + // Validate the indexer completed successfully + await ValidateResourceExitCode(ResourceNames.ElasticsearchIngest); + + Console.WriteLine("Elasticsearch indexing completed successfully. Tests can now run."); + } + catch (Exception e) + { + Console.WriteLine($"Failed to initialize test: {e.Message}"); + Console.WriteLine(string.Join(Environment.NewLine, + fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse())); + throw; + } + } + + /// + /// Checks if indexing is needed by comparing the channel hash in Elasticsearch + /// with the current semantic exporter channel hash. + /// Uses the same pattern as ElasticsearchMarkdownExporter. + /// + private async ValueTask IsIndexingNeeded() + { + try + { + // Get Elasticsearch configuration from Aspire + var (elasticsearchUrl, apiKey, password, username) = GetElasticsearchConfiguration(); + + if (string.IsNullOrEmpty(elasticsearchUrl)) + { + Console.WriteLine("No Elasticsearch URL configured, indexing will be performed."); + Connected = false; + return false; + } + + Console.WriteLine($"Checking remote Elasticsearch at {elasticsearchUrl} for existing data..."); + + // Create Elasticsearch endpoint configuration + var endpoint = new ElasticsearchEndpoint + { + Uri = new Uri(elasticsearchUrl), + ApiKey = apiKey, + Username = username, + Password = password + }; + + // Create transport configuration (similar to ElasticsearchMarkdownExporter) + var configuration = new ElasticsearchConfiguration(endpoint.Uri) + { + Authentication = endpoint.ApiKey is { } eApiKey + ? new ApiKey(eApiKey) + : endpoint is { Username: { } eUsername, Password: { } ePassword } + ? new BasicAuthentication(eUsername, ePassword) + : null, + EnableHttpCompression = true + }; + + var transport = new DistributedTransport(configuration); + Connected = (await transport.HeadAsync("/", TestContext.Current.CancellationToken)).ApiCallDetails.HasSuccessfulStatusCode; + + // Create a logger factory and diagnostics collector + var loggerFactory = fixture.DistributedApplication.Services.GetRequiredService(); + var collector = new ConsoleDiagnosticsCollector(loggerFactory); + + // Create semantic exporter to check channel hash (index namespace is 'dev' for tests) + using var semanticExporter = new ElasticsearchSemanticExporter( + loggerFactory, + collector, + endpoint, + "dev", // index namespace + transport + ); + + // Get the current hash from Elasticsearch index template + var currentSemanticHash = await semanticExporter.Channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty; + + // Get the expected channel hash from the semantic exporter + await semanticExporter.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, ctx: TestContext.Current.CancellationToken); + var expectedSemanticHash = semanticExporter.Channel.ChannelHash; + + Console.WriteLine($"Elasticsearch semantic hash: '{currentSemanticHash}'"); + Console.WriteLine($"Expected semantic hash: '{expectedSemanticHash}'"); + + // If hashes match, no indexing needed + if (!string.IsNullOrEmpty(currentSemanticHash) && currentSemanticHash == expectedSemanticHash) + { + Console.WriteLine("Semantic channel hashes match. Skipping indexing."); + return false; + } + + Console.WriteLine("Semantic channel hashes do not match or remote hash is empty. Indexing needed."); + return true; + } + catch (Exception ex) + { + Console.WriteLine($"Error checking Elasticsearch state: {ex.Message}. Will proceed with indexing."); + return true; // If we can't check, safer to index + } + } + + private async ValueTask ValidateResourceExitCode(string resourceName) + { + var eventResource = await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceAsync(resourceName, _ => true); + var id = eventResource.ResourceId; + + if (!fixture.DistributedApplication.ResourceNotifications.TryGetCurrentState(id, out var state)) + throw new Exception($"Could not find {resourceName} in the current state"); + + if (state.Snapshot.ExitCode is not 0) + { + var recentLogs = string.Join(Environment.NewLine, + fixture.InMemoryLogger.RecordedLogs.Reverse().Take(100).Reverse()); + throw new Exception( + $"Exit code should be 0 for {resourceName}, but was {state.Snapshot.ExitCode}. Recent logs:{Environment.NewLine}{recentLogs}"); + } + + Console.WriteLine($"{resourceName} completed with exit code 0"); + } + + /// + /// Gets Elasticsearch configuration from Aspire parameters and environment. + /// Manually reads user secrets from the aspire project, then falls back to environment variables. + /// + private (string? Url, string? ApiKey, string? Password, string? Username) GetElasticsearchConfiguration() + { + // Manually read user secrets from the aspire project + // UserSecretsId from aspire.csproj: 72f50f33-6fb9-4d08-bff3-39568fe370b3 + var configBuilder = new ConfigurationBuilder(); + configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); + var userSecretsConfig = configBuilder.Build(); + + // Get URL - try user secrets first, then Aspire configuration, then environment + var url = userSecretsConfig["Parameters:DocumentationElasticUrl"] + ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticUrl"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL"); + + // Get API Key - try user secrets first, then Aspire configuration, then environment + var apiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] + ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticApiKey"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY"); + + // Get password for local Elasticsearch (when using --start-elasticsearch) + var password = userSecretsConfig["Parameters:DocumentationElasticPassword"] ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_PASSWORD"); + + // Get username (defaults to "elastic") + var username = userSecretsConfig["Parameters:DocumentationElasticUsername"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_USERNAME") + ?? "elastic"; + + Console.WriteLine($"Elasticsearch configuration retrieved: URL={url != null}, ApiKey={apiKey != null}, Password={password != null}"); + + return (url, apiKey, password, username); + } + + public ValueTask DisposeAsync() + { + HttpClient?.Dispose(); + + // Only dump logs if test failed + if (TestContext.Current.TestState?.Result is not TestResult.Passed) + { + foreach (var log in fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse()) + Console.WriteLine(log.Message); + } + + GC.SuppressFinalize(this); + return default; + } +} diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs index 4247b459d..4d9d666fa 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs @@ -9,6 +9,7 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; namespace Elastic.Assembler.IntegrationTests.Search; @@ -17,37 +18,25 @@ namespace Elastic.Assembler.IntegrationTests.Search; /// to provide detailed explanations of search results using Elasticsearch's _explain API. /// These tests help understand and improve search ranking by showing detailed scoring breakdowns. /// -[Collection(SearchBootstrapFixture.Collection)] -public class SearchRelevanceTests(SearchBootstrapFixture searchFixture, DocumentationFixture documentationFixture, ITestOutputHelper output) : SearchTestBase +public class SearchRelevanceTests(ITestOutputHelper output) { /// /// Theory data for search queries mapped to expected first hit URLs. /// Same as SearchIntegrationTests but with detailed explain output on failures. /// - public static TheoryData SearchQueryTestCases => new() + public static TheoryData SearchQueryTestCases => new() { //TODO these results reflect today's result, we still have some work to do to improve the relevance of the search results // Elasticsearch specific queries - { "elasticsearch getting started", "/docs/reference/elasticsearch/clients/java/getting-started" }, - { "apm", "/docs/reference/apm/observability/apm" }, - { "kibana dashboard", "/docs/reference/beats/auditbeat/configuration-dashboards" }, - - // .NET specific queries (testing dotnet -> net replacement) - { "dotnet client", "/docs/reference/elasticsearch/clients/dotnet/using-net-client" }, - { ".net apm agent", "/docs/reference/apm/agents/dotnet" }, - - // General queries - { "machine learning", "/docs/reference/machine-learning" }, - { "ingest pipeline", "/docs/reference/beats/metricbeat/configuring-ingest-node" }, + { "elasticsearch getting started", "/docs/reference/elasticsearch/clients/java/getting-started", null }, + { "elastic common schema", "/docs/reference/ecs", null }, }; [Theory] [MemberData(nameof(SearchQueryTestCases))] - public async Task SearchReturnsExpectedFirstResultWithExplain(string query, string expectedFirstResultUrl) + public async Task SearchReturnsExpectedFirstResultWithExplain(string query, string expectedFirstResultUrl, string[]? additionalExpectedUrls) { - Assert.SkipUnless(searchFixture.Connected, "Elasticsearch is not connected"); - // Arrange - Create ElasticsearchGateway directly var gateway = CreateElasticsearchGateway(); @@ -118,6 +107,32 @@ public async Task SearchReturnsExpectedFirstResultWithExplain(string query, stri output.WriteLine($"✅ First result matches expected: {actualFirstResultUrl}"); output.WriteLine($" Score: {results.First().Score:F4}"); } + + // Check for additional expected URLs if provided + if (additionalExpectedUrls?.Length > 0) + { + output.WriteLine($"\nChecking for {additionalExpectedUrls.Length} additional expected URLs on first page..."); + var resultUrls = results.Select(r => r.Url).ToList(); + + foreach (var expectedUrl in additionalExpectedUrls) + { + if (resultUrls.Contains(expectedUrl)) + { + var position = resultUrls.IndexOf(expectedUrl) + 1; + output.WriteLine($"✅ Found expected URL at position {position}: {expectedUrl}"); + } + else + { + output.WriteLine($"❌ Expected URL not found on first page: {expectedUrl}"); + output.WriteLine($" First page results ({results.Count}):"); + for (var i = 0; i < results.Count; i++) + { + output.WriteLine($" {i + 1}. {results[i].Url} (score: {results[i].Score:F4})"); + } + resultUrls.Should().Contain(expectedUrl, $"Expected URL '{expectedUrl}' should be present on the first page of results for query '{query}'"); + } + } + } } [Fact] @@ -160,9 +175,6 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() /// private ElasticsearchGateway CreateElasticsearchGateway() { - var configuration = documentationFixture.DistributedApplication.Services.GetRequiredService(); - var loggerFactory = documentationFixture.DistributedApplication.Services.GetRequiredService(); - // Build a new ConfigurationBuilder to read user secrets var configBuilder = new ConfigurationBuilder(); configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); @@ -170,12 +182,10 @@ private ElasticsearchGateway CreateElasticsearchGateway() // Get Elasticsearch configuration with fallback chain: user secrets → configuration → environment var elasticsearchUrl = userSecretsConfig["Parameters:DocumentationElasticUrl"] - ?? configuration["Parameters:DocumentationElasticUrl"] ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL") ?? throw new InvalidOperationException("Elasticsearch URL not configured"); var elasticsearchApiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] - ?? configuration["Parameters:DocumentationElasticApiKey"] ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY") ?? throw new InvalidOperationException("Elasticsearch API key not configured"); @@ -183,7 +193,7 @@ private ElasticsearchGateway CreateElasticsearchGateway() var parameterProvider = new TestParameterProvider(elasticsearchUrl, elasticsearchApiKey, "semantic-docs-dev-latest"); var options = new ElasticsearchOptions(parameterProvider); - return new ElasticsearchGateway(options, loggerFactory.CreateLogger()); + return new ElasticsearchGateway(options, NullLogger.Instance); } /// diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs index 2dae77bbb..2b45d165d 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs @@ -2,287 +2,8 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using Aspire.Hosting.ApplicationModel; -using Aspire.Hosting.Testing; -using Documentation.Builder.Diagnostics.Console; -using Elastic.Documentation.Configuration; -using Elastic.Ingest.Elasticsearch; -using Elastic.Markdown.Exporters.Elasticsearch; -using Elastic.Transport; -using Elastic.Transport.Products.Elasticsearch; -using FluentAssertions; -using Microsoft.Extensions.Configuration; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; -using static Elastic.Documentation.Aspire.ResourceNames; - namespace Elastic.Assembler.IntegrationTests.Search; - -[CollectionDefinition(Collection)] -public class SearchBootstrapFixture(DocumentationFixture fixture) : IAsyncLifetime -{ - public const string Collection = "Search"; - public HttpClient HttpClient { get; private set; } = null!; - public bool Connected { get; private set; } - - /// - /// Initializes the test by ensuring AssemblerServe (which hosts the API) is healthy and Elasticsearch is indexed. - /// Checks if the remote Elasticsearch already has up-to-date data to avoid unnecessary indexing. - /// - public async ValueTask InitializeAsync() - { - try - { - // Wait for AssemblerServe to be ready (it hosts the embedded Lambda API) - Console.WriteLine("Waiting for AssemblerServe (with embedded API) to become healthy..."); - await fixture.DistributedApplication.ResourceNotifications - .WaitForResourceHealthyAsync(AssemblerServe, cancellationToken: TestContext.Current.CancellationToken) - .WaitAsync(TimeSpan.FromMinutes(2), TestContext.Current.CancellationToken); - - Console.WriteLine("AssemblerServe is healthy. Creating HTTP client..."); - - // Get the HTTP client for AssemblerServe which includes the API endpoints - HttpClient = fixture.DistributedApplication.CreateHttpClient(AssemblerServe, "http"); - HttpClient.Should().NotBeNull("Should be able to create HTTP client for AssemblerServe"); - - // Check if Elasticsearch already has up-to-date data - var indexingNeeded = await IsIndexingNeeded(); - - if (!Connected) - { - Console.WriteLine("Can not connect to Elasticsearch. Skipping indexing."); - return; - } - - if (!indexingNeeded) - { - Console.WriteLine("Elasticsearch already has up-to-date data. Skipping indexing."); - return; - } - - Console.WriteLine("Elasticsearch needs indexing. Manually starting indexer..."); - - // The indexer always has WithExplicitStart(), so we must manually start it - // Get the ResourceLoggerService to send the start command - fixture.DistributedApplication.Services - .GetRequiredService(); - - // Get the resource notification service to find the resource - fixture.DistributedApplication.Services - .GetRequiredService(); - - // Wait for the resource to be available - var resourceEvent = await fixture.DistributedApplication.ResourceNotifications - .WaitForResourceAsync(ElasticsearchIngest, _ => true, TestContext.Current.CancellationToken) - .WaitAsync(TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); - - // Get the resource instance - var resource = resourceEvent.Resource; - - // Execute the start command using ResourceCommandAnnotation - var startCommand = resource.Annotations.OfType() - .FirstOrDefault(a => a.Name == "resource-start"); - - if (startCommand != null) - { - Console.WriteLine($"Executing start command for {ElasticsearchIngest}..."); - - // Create ExecuteCommandContext for the start command - var commandContext = new ExecuteCommandContext - { - ResourceName = resourceEvent.ResourceId, - ServiceProvider = fixture.DistributedApplication.Services, - CancellationToken = TestContext.Current.CancellationToken - }; - - await startCommand.ExecuteCommand(commandContext); - Console.WriteLine($"Start command executed for {ElasticsearchIngest}"); - } - else - { - throw new Exception($"Could not find start command for {ElasticsearchIngest}"); - } - - Console.WriteLine("Waiting for indexer to complete..."); - - // Wait for the indexer to complete - _ = await fixture.DistributedApplication.ResourceNotifications - .WaitForResourceAsync(ElasticsearchIngest, KnownResourceStates.TerminalStates, - cancellationToken: TestContext.Current.CancellationToken) - .WaitAsync(TimeSpan.FromMinutes(10), TestContext.Current.CancellationToken); - - Console.WriteLine("Elasticsearch indexer reached terminal state. Validating exit code..."); - - // Validate the indexer completed successfully - await ValidateResourceExitCode(ElasticsearchIngest); - - Console.WriteLine("Elasticsearch indexing completed successfully. Tests can now run."); - } - catch (Exception e) - { - Console.WriteLine($"Failed to initialize test: {e.Message}"); - Console.WriteLine(string.Join(Environment.NewLine, - fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse())); - throw; - } - } - - /// - /// Checks if indexing is needed by comparing the channel hash in Elasticsearch - /// with the current semantic exporter channel hash. - /// Uses the same pattern as ElasticsearchMarkdownExporter. - /// - private async ValueTask IsIndexingNeeded() - { - try - { - // Get Elasticsearch configuration from Aspire - var (elasticsearchUrl, apiKey, password, username) = GetElasticsearchConfiguration(); - - if (string.IsNullOrEmpty(elasticsearchUrl)) - { - Console.WriteLine("No Elasticsearch URL configured, indexing will be performed."); - Connected = false; - return false; - } - - Console.WriteLine($"Checking remote Elasticsearch at {elasticsearchUrl} for existing data..."); - - // Create Elasticsearch endpoint configuration - var endpoint = new ElasticsearchEndpoint - { - Uri = new Uri(elasticsearchUrl), - ApiKey = apiKey, - Username = username, - Password = password - }; - - // Create transport configuration (similar to ElasticsearchMarkdownExporter) - var configuration = new ElasticsearchConfiguration(endpoint.Uri) - { - Authentication = endpoint.ApiKey is { } eApiKey - ? new ApiKey(eApiKey) - : endpoint is { Username: { } eUsername, Password: { } ePassword } - ? new BasicAuthentication(eUsername, ePassword) - : null, - EnableHttpCompression = true - }; - - var transport = new DistributedTransport(configuration); - Connected = (await transport.HeadAsync("/", TestContext.Current.CancellationToken)).ApiCallDetails.HasSuccessfulStatusCode; - - // Create a logger factory and diagnostics collector - var loggerFactory = fixture.DistributedApplication.Services.GetRequiredService(); - var collector = new ConsoleDiagnosticsCollector(loggerFactory); - - // Create semantic exporter to check channel hash (index namespace is 'dev' for tests) - using var semanticExporter = new ElasticsearchSemanticExporter( - loggerFactory, - collector, - endpoint, - "dev", // index namespace - transport - ); - - // Get the current hash from Elasticsearch index template - var currentSemanticHash = await semanticExporter.Channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty; - - // Get the expected channel hash from the semantic exporter - await semanticExporter.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, ctx: TestContext.Current.CancellationToken); - var expectedSemanticHash = semanticExporter.Channel.ChannelHash; - - Console.WriteLine($"Elasticsearch semantic hash: '{currentSemanticHash}'"); - Console.WriteLine($"Expected semantic hash: '{expectedSemanticHash}'"); - - // If hashes match, no indexing needed - if (!string.IsNullOrEmpty(currentSemanticHash) && currentSemanticHash == expectedSemanticHash) - { - Console.WriteLine("Semantic channel hashes match. Skipping indexing."); - return false; - } - - Console.WriteLine("Semantic channel hashes do not match or remote hash is empty. Indexing needed."); - return true; - } - catch (Exception ex) - { - Console.WriteLine($"Error checking Elasticsearch state: {ex.Message}. Will proceed with indexing."); - return true; // If we can't check, safer to index - } - } - - private async ValueTask ValidateResourceExitCode(string resourceName) - { - var eventResource = await fixture.DistributedApplication.ResourceNotifications - .WaitForResourceAsync(resourceName, _ => true); - var id = eventResource.ResourceId; - - if (!fixture.DistributedApplication.ResourceNotifications.TryGetCurrentState(id, out var state)) - throw new Exception($"Could not find {resourceName} in the current state"); - - if (state.Snapshot.ExitCode is not 0) - { - var recentLogs = string.Join(Environment.NewLine, - fixture.InMemoryLogger.RecordedLogs.Reverse().Take(100).Reverse()); - throw new Exception( - $"Exit code should be 0 for {resourceName}, but was {state.Snapshot.ExitCode}. Recent logs:{Environment.NewLine}{recentLogs}"); - } - - Console.WriteLine($"{resourceName} completed with exit code 0"); - } - - /// - /// Gets Elasticsearch configuration from Aspire parameters and environment. - /// Manually reads user secrets from the aspire project, then falls back to environment variables. - /// - private (string? Url, string? ApiKey, string? Password, string? Username) GetElasticsearchConfiguration() - { - // Manually read user secrets from the aspire project - // UserSecretsId from aspire.csproj: 72f50f33-6fb9-4d08-bff3-39568fe370b3 - var configBuilder = new ConfigurationBuilder(); - configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - var userSecretsConfig = configBuilder.Build(); - - // Get URL - try user secrets first, then Aspire configuration, then environment - var url = userSecretsConfig["Parameters:DocumentationElasticUrl"] - ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticUrl"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL"); - - // Get API Key - try user secrets first, then Aspire configuration, then environment - var apiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] - ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticApiKey"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY"); - - // Get password for local Elasticsearch (when using --start-elasticsearch) - var password = userSecretsConfig["Parameters:DocumentationElasticPassword"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_PASSWORD"); - - // Get username (defaults to "elastic") - var username = userSecretsConfig["Parameters:DocumentationElasticUsername"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_USERNAME") - ?? "elastic"; - - Console.WriteLine($"Elasticsearch configuration retrieved: URL={url != null}, ApiKey={apiKey != null}, Password={password != null}"); - - return (url, apiKey, password, username); - } - - public ValueTask DisposeAsync() - { - HttpClient?.Dispose(); - - // Only dump logs if test failed - if (TestContext.Current.TestState?.Result is not TestResult.Passed) - { - foreach (var log in fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse()) - Console.WriteLine(log.Message); - } - - GC.SuppressFinalize(this); - return default; - } -} /// /// Base class for search integration tests that handles initialization /// and conditional Elasticsearch indexing based on hash comparison. diff --git a/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj new file mode 100644 index 000000000..0691c1282 --- /dev/null +++ b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj @@ -0,0 +1,14 @@ + + + + net10.0 + enable + enable + + + + + + + + diff --git a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs new file mode 100644 index 000000000..90b1305b1 --- /dev/null +++ b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs @@ -0,0 +1,224 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Elastic.Documentation.Api.Infrastructure.Adapters.Search; +using Elastic.Documentation.Api.Infrastructure.Aws; +using FluentAssertions; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.Logging.Abstractions; + +namespace Search.IntegrationTests; + +/// +/// Integration tests for search relevance that use ElasticsearchGateway directly +/// to provide detailed explanations of search results using Elasticsearch's _explain API. +/// These tests help understand and improve search ranking by showing detailed scoring breakdowns. +/// +public class SearchRelevanceTests(ITestOutputHelper output) +{ + /// + /// Theory data for search queries mapped to expected first hit URLs. + /// Same as SearchIntegrationTests but with detailed explain output on failures. + /// + public static TheoryData SearchQueryTestCases => new() + { + //TODO these results reflect today's result, we still have some work to do to improve the relevance of the search results + + // Elasticsearch specific queries + { "elasticsearch get started", "/docs/solutions/search/get-started", null }, + { "elasticsearch getting started", "/docs/solutions/search/get-started", null }, + { "elastic common schema", "/docs/reference/ecs", null }, + { "ecs", "/docs/reference/ecs", null }, + { "c# client", "/docs/reference/elasticsearch/clients/dotnet", null }, + { "dotnet client", "/docs/reference/elasticsearch/clients/dotnet", null }, + { "runscript", "/docs/api/doc/kibana/operation/operation-runscriptaction", [ "/docs/solutions/security/endpoint-response-actions" ] }, + { "data-streams", "/docs/manage-data/data-store/data-streams", null }, + { "datastream", "/docs/manage-data/data-store/data-streams", null }, + { "data stream", "/docs/manage-data/data-store/data-streams", null }, + { "saml sso", "/docs/deploy-manage/users-roles/cloud-organization/register-elastic-cloud-saml-in-okta", ["/docs/deploy-manage/users-roles/cloud-organization/configure-saml-authentication"] }, + { "templates", "/docs/manage-data/data-store/templates", null}, + { "query dsl", "/docs/explore-analyze/query-filter/languages/querydsl", null}, + { "querydsl", "/docs/explore-analyze/query-filter/languages/querydsl", null} + }; + + [Theory] + [MemberData(nameof(SearchQueryTestCases))] + public async Task SearchReturnsExpectedFirstResultWithExplain(string query, string expectedFirstResultUrl, string[]? additionalExpectedUrls) + { + // Arrange - Create ElasticsearchGateway directly + var gateway = CreateElasticsearchGateway(); + + // Act - Perform the search + var (totalHits, results) = await gateway.HybridSearchWithRrfAsync(query, 1, 5, TestContext.Current.CancellationToken); + + // Log basic results + output.WriteLine($"Query: {query}"); + output.WriteLine($"Total hits: {totalHits}"); + output.WriteLine($"Results returned: {results.Count}"); + + results.Should().NotBeEmpty($"Search for '{query}' should return results"); + + var actualFirstResultUrl = results.First().Url; + + // If the first result doesn't match expectations, use _explain API for detailed analysis + if (actualFirstResultUrl != expectedFirstResultUrl) + { + output.WriteLine("\n❌ FIRST RESULT MISMATCH - Fetching detailed explanations...\n"); + + // Get explain for both the actual top result and the expected result + var (topResultExplain, expectedResultExplain) = await gateway.ExplainTopResultAndExpectedAsync( + query, + expectedFirstResultUrl, + TestContext.Current.CancellationToken); + + // Output the actual top result explanation + output.WriteLine("═══════════════════════════════════════════════════════════════"); + output.WriteLine($"ACTUAL TOP RESULT: {topResultExplain.DocumentUrl}"); + output.WriteLine($"Search Title: {topResultExplain.SearchTitle}"); + output.WriteLine($"Score: {topResultExplain.Score:F4}"); + output.WriteLine($"Matched: {topResultExplain.Matched}"); + output.WriteLine("───────────────────────────────────────────────────────────────"); + output.WriteLine("Scoring Breakdown:"); + output.WriteLine(topResultExplain.Explanation); + + // Output the expected result explanation + output.WriteLine("═══════════════════════════════════════════════════════════════"); + output.WriteLine($"EXPECTED RESULT: {expectedResultExplain.DocumentUrl}"); + output.WriteLine($"Search Title: {expectedResultExplain.SearchTitle}"); + output.WriteLine($"Score: {expectedResultExplain.Score:F4}"); + output.WriteLine($"Matched: {expectedResultExplain.Matched}"); + output.WriteLine("───────────────────────────────────────────────────────────────"); + output.WriteLine("Scoring Breakdown:"); + output.WriteLine(expectedResultExplain.Explanation); + output.WriteLine("═══════════════════════════════════════════════════════════════\n"); + + // Create a detailed failure message + var scoreDiff = topResultExplain.Score - expectedResultExplain.Score; + var failureMessage = $@" +First result for query '{query}' did not match expectation. + +Expected: {expectedFirstResultUrl} + - Score: {expectedResultExplain.Score:F4} + - Matched: {expectedResultExplain.Matched} + +Actual: {actualFirstResultUrl} + - Score: {topResultExplain.Score:F4} + - Matched: {topResultExplain.Matched} + +Score Difference: {scoreDiff:F4} (actual is {(scoreDiff > 0 ? "higher" : "lower")}) + +See test output above for detailed scoring breakdowns from Elasticsearch's _explain API. +"; + + actualFirstResultUrl.Should().Be(expectedFirstResultUrl, failureMessage); + } + else + { + output.WriteLine($"✅ First result matches expected: {actualFirstResultUrl}"); + output.WriteLine($" Score: {results.First().Score:F4}"); + } + + // Check for additional expected URLs if provided + if (additionalExpectedUrls?.Length > 0) + { + output.WriteLine($"\nChecking for {additionalExpectedUrls.Length} additional expected URLs on first page..."); + var resultUrls = results.Select(r => r.Url).ToList(); + + foreach (var expectedUrl in additionalExpectedUrls) + { + if (resultUrls.Contains(expectedUrl)) + { + var position = resultUrls.IndexOf(expectedUrl) + 1; + output.WriteLine($"✅ Found expected URL at position {position}: {expectedUrl}"); + } + else + { + output.WriteLine($"❌ Expected URL not found on first page: {expectedUrl}"); + output.WriteLine($" First page results ({results.Count}):"); + for (var i = 0; i < results.Count; i++) + { + output.WriteLine($" {i + 1}. {results[i].Url} (score: {results[i].Score:F4})"); + } + resultUrls.Should().Contain(expectedUrl, $"Expected URL '{expectedUrl}' should be present on the first page of results for query '{query}'"); + } + } + } + } + + [Fact] + public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() + { + // Arrange + var gateway = CreateElasticsearchGateway(); + const string query = "elasticsearch getting started"; + const string expectedUrl = "/docs/reference/elasticsearch/clients/java/getting-started"; + + // Act - Use the ExplainTopResultAndExpectedAsync method which gets top result and explains both + var (topResultExplain, expectedResultExplain) = await gateway.ExplainTopResultAndExpectedAsync( + query, + expectedUrl, + TestContext.Current.CancellationToken); + + // Assert - Top result should have explanation + output.WriteLine($"Query: {query}"); + output.WriteLine($"\nTOP RESULT: {topResultExplain.DocumentUrl}"); + output.WriteLine($"Found: {topResultExplain.Found}"); + output.WriteLine($"Matched: {topResultExplain.Matched}"); + output.WriteLine($"Score: {topResultExplain.Score:F4}"); + output.WriteLine("Explanation:"); + output.WriteLine(topResultExplain.Explanation); + + output.WriteLine($"\nEXPECTED RESULT: {expectedResultExplain.DocumentUrl}"); + output.WriteLine($"Found: {expectedResultExplain.Found}"); + output.WriteLine($"Matched: {expectedResultExplain.Matched}"); + output.WriteLine($"Score: {expectedResultExplain.Score:F4}"); + output.WriteLine("Explanation:"); + output.WriteLine(expectedResultExplain.Explanation); + + // Both results should have explanations (even if scores are different) + topResultExplain.Explanation.Should().NotBeEmpty("Top result should have an explanation"); + expectedResultExplain.Explanation.Should().NotBeEmpty("Expected result should have an explanation"); + } + + /// + /// Creates an ElasticsearchGateway instance using configuration from the distributed application. + /// + private ElasticsearchGateway CreateElasticsearchGateway() + { + // Build a new ConfigurationBuilder to read user secrets + var configBuilder = new ConfigurationBuilder(); + configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); + var userSecretsConfig = configBuilder.Build(); + + // Get Elasticsearch configuration with fallback chain: user secrets → configuration → environment + var elasticsearchUrl = userSecretsConfig["Parameters:DocumentationElasticUrl"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL") + ?? throw new InvalidOperationException("Elasticsearch URL not configured"); + + var elasticsearchApiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY") + ?? throw new InvalidOperationException("Elasticsearch API key not configured"); + + // Create a test parameter provider with the configuration values + var parameterProvider = new TestParameterProvider(elasticsearchUrl, elasticsearchApiKey, "semantic-docs-dev-latest"); + var options = new ElasticsearchOptions(parameterProvider); + + return new ElasticsearchGateway(options, NullLogger.Instance); + } + + /// + /// Simple test implementation of IParameterProvider that returns configured values. + /// + private sealed class TestParameterProvider(string url, string apiKey, string indexName) : IParameterProvider + { + public Task GetParam(string name, bool withDecryption = true, Cancel ctx = default) => + name switch + { + "docs-elasticsearch-url" => Task.FromResult(url), + "docs-elasticsearch-apikey" => Task.FromResult(apiKey), + "docs-elasticsearch-index" => Task.FromResult(indexName), + _ => throw new ArgumentException($"Parameter '{name}' not configured in test provider") + }; + } +} From 8cd7411d4780d3e5c2ca6e9291418631846883c4 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Thu, 27 Nov 2025 13:43:09 +0100 Subject: [PATCH 2/5] SkipUnless connected to Elasticsearch --- .../Adapters/Search/ElasticsearchGateway.cs | 2 ++ .../Search.IntegrationTests/SearchRelevanceTests.cs | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs index 8542f83e0..b28df4f90 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -83,6 +83,8 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger CanConnect(Cancel ctx) => (await _client.PingAsync(ctx)).IsValidResponse; + public async Task<(int TotalHits, List Results)> SearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) => await HybridSearchWithRrfAsync(query, pageNumber, pageSize, ctx); diff --git a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs index 90b1305b1..902e5dfbc 100644 --- a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs +++ b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs @@ -48,6 +48,8 @@ public async Task SearchReturnsExpectedFirstResultWithExplain(string query, stri { // Arrange - Create ElasticsearchGateway directly var gateway = CreateElasticsearchGateway(); + var canConnect = await gateway.CanConnect(TestContext.Current.CancellationToken); + Assert.SkipUnless(canConnect, "Elasticsearch is not connected"); // Act - Perform the search var (totalHits, results) = await gateway.HybridSearchWithRrfAsync(query, 1, 5, TestContext.Current.CancellationToken); @@ -151,6 +153,9 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() { // Arrange var gateway = CreateElasticsearchGateway(); + var canConnect = await gateway.CanConnect(TestContext.Current.CancellationToken); + Assert.SkipUnless(canConnect, "Elasticsearch is not connected"); + const string query = "elasticsearch getting started"; const string expectedUrl = "/docs/reference/elasticsearch/clients/java/getting-started"; From 3e231339a22cacccd72cd58d62b1c77f1a85270c Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Thu, 27 Nov 2025 16:52:34 +0100 Subject: [PATCH 3/5] Delete SearchRelevanceTests from Assembler.IntegrationTests now lives under Search.IntegrationTests --- .../Search/SearchRelevanceTests.cs | 213 ------------------ 1 file changed, 213 deletions(-) delete mode 100644 tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs deleted file mode 100644 index 4d9d666fa..000000000 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs +++ /dev/null @@ -1,213 +0,0 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - -using Elastic.Documentation.Api.Infrastructure.Adapters.Search; -using Elastic.Documentation.Api.Infrastructure.Aws; -using Elastic.Documentation.Configuration; -using FluentAssertions; -using Microsoft.Extensions.Configuration; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; -using Microsoft.Extensions.Logging.Abstractions; - -namespace Elastic.Assembler.IntegrationTests.Search; - -/// -/// Integration tests for search relevance that use ElasticsearchGateway directly -/// to provide detailed explanations of search results using Elasticsearch's _explain API. -/// These tests help understand and improve search ranking by showing detailed scoring breakdowns. -/// -public class SearchRelevanceTests(ITestOutputHelper output) -{ - /// - /// Theory data for search queries mapped to expected first hit URLs. - /// Same as SearchIntegrationTests but with detailed explain output on failures. - /// - public static TheoryData SearchQueryTestCases => new() - { - //TODO these results reflect today's result, we still have some work to do to improve the relevance of the search results - - // Elasticsearch specific queries - { "elasticsearch getting started", "/docs/reference/elasticsearch/clients/java/getting-started", null }, - { "elastic common schema", "/docs/reference/ecs", null }, - }; - - [Theory] - [MemberData(nameof(SearchQueryTestCases))] - public async Task SearchReturnsExpectedFirstResultWithExplain(string query, string expectedFirstResultUrl, string[]? additionalExpectedUrls) - { - // Arrange - Create ElasticsearchGateway directly - var gateway = CreateElasticsearchGateway(); - - // Act - Perform the search - var (totalHits, results) = await gateway.HybridSearchWithRrfAsync(query, 1, 5, TestContext.Current.CancellationToken); - - // Log basic results - output.WriteLine($"Query: {query}"); - output.WriteLine($"Total hits: {totalHits}"); - output.WriteLine($"Results returned: {results.Count}"); - - results.Should().NotBeEmpty($"Search for '{query}' should return results"); - - var actualFirstResultUrl = results.First().Url; - - // If the first result doesn't match expectations, use _explain API for detailed analysis - if (actualFirstResultUrl != expectedFirstResultUrl) - { - output.WriteLine("\n❌ FIRST RESULT MISMATCH - Fetching detailed explanations...\n"); - - // Get explain for both the actual top result and the expected result - var (topResultExplain, expectedResultExplain) = await gateway.ExplainTopResultAndExpectedAsync( - query, - expectedFirstResultUrl, - TestContext.Current.CancellationToken); - - // Output the actual top result explanation - output.WriteLine("═══════════════════════════════════════════════════════════════"); - output.WriteLine($"ACTUAL TOP RESULT: {topResultExplain.DocumentUrl}"); - output.WriteLine($"Score: {topResultExplain.Score:F4}"); - output.WriteLine($"Matched: {topResultExplain.Matched}"); - output.WriteLine("───────────────────────────────────────────────────────────────"); - output.WriteLine("Scoring Breakdown:"); - output.WriteLine(topResultExplain.Explanation); - - // Output the expected result explanation - output.WriteLine("═══════════════════════════════════════════════════════════════"); - output.WriteLine($"EXPECTED RESULT: {expectedResultExplain.DocumentUrl}"); - output.WriteLine($"Score: {expectedResultExplain.Score:F4}"); - output.WriteLine($"Matched: {expectedResultExplain.Matched}"); - output.WriteLine("───────────────────────────────────────────────────────────────"); - output.WriteLine("Scoring Breakdown:"); - output.WriteLine(expectedResultExplain.Explanation); - output.WriteLine("═══════════════════════════════════════════════════════════════\n"); - - // Create a detailed failure message - var scoreDiff = topResultExplain.Score - expectedResultExplain.Score; - var failureMessage = $@" -First result for query '{query}' did not match expectation. - -Expected: {expectedFirstResultUrl} - - Score: {expectedResultExplain.Score:F4} - - Matched: {expectedResultExplain.Matched} - -Actual: {actualFirstResultUrl} - - Score: {topResultExplain.Score:F4} - - Matched: {topResultExplain.Matched} - -Score Difference: {scoreDiff:F4} (actual is {(scoreDiff > 0 ? "higher" : "lower")}) - -See test output above for detailed scoring breakdowns from Elasticsearch's _explain API. -"; - - actualFirstResultUrl.Should().Be(expectedFirstResultUrl, failureMessage); - } - else - { - output.WriteLine($"✅ First result matches expected: {actualFirstResultUrl}"); - output.WriteLine($" Score: {results.First().Score:F4}"); - } - - // Check for additional expected URLs if provided - if (additionalExpectedUrls?.Length > 0) - { - output.WriteLine($"\nChecking for {additionalExpectedUrls.Length} additional expected URLs on first page..."); - var resultUrls = results.Select(r => r.Url).ToList(); - - foreach (var expectedUrl in additionalExpectedUrls) - { - if (resultUrls.Contains(expectedUrl)) - { - var position = resultUrls.IndexOf(expectedUrl) + 1; - output.WriteLine($"✅ Found expected URL at position {position}: {expectedUrl}"); - } - else - { - output.WriteLine($"❌ Expected URL not found on first page: {expectedUrl}"); - output.WriteLine($" First page results ({results.Count}):"); - for (var i = 0; i < results.Count; i++) - { - output.WriteLine($" {i + 1}. {results[i].Url} (score: {results[i].Score:F4})"); - } - resultUrls.Should().Contain(expectedUrl, $"Expected URL '{expectedUrl}' should be present on the first page of results for query '{query}'"); - } - } - } - } - - [Fact] - public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() - { - // Arrange - var gateway = CreateElasticsearchGateway(); - const string query = "elasticsearch getting started"; - const string expectedUrl = "/docs/reference/elasticsearch/clients/java/getting-started"; - - // Act - Use the ExplainTopResultAndExpectedAsync method which gets top result and explains both - var (topResultExplain, expectedResultExplain) = await gateway.ExplainTopResultAndExpectedAsync( - query, - expectedUrl, - TestContext.Current.CancellationToken); - - // Assert - Top result should have explanation - output.WriteLine($"Query: {query}"); - output.WriteLine($"\nTOP RESULT: {topResultExplain.DocumentUrl}"); - output.WriteLine($"Found: {topResultExplain.Found}"); - output.WriteLine($"Matched: {topResultExplain.Matched}"); - output.WriteLine($"Score: {topResultExplain.Score:F4}"); - output.WriteLine("Explanation:"); - output.WriteLine(topResultExplain.Explanation); - - output.WriteLine($"\nEXPECTED RESULT: {expectedResultExplain.DocumentUrl}"); - output.WriteLine($"Found: {expectedResultExplain.Found}"); - output.WriteLine($"Matched: {expectedResultExplain.Matched}"); - output.WriteLine($"Score: {expectedResultExplain.Score:F4}"); - output.WriteLine("Explanation:"); - output.WriteLine(expectedResultExplain.Explanation); - - // Both results should have explanations (even if scores are different) - topResultExplain.Explanation.Should().NotBeEmpty("Top result should have an explanation"); - expectedResultExplain.Explanation.Should().NotBeEmpty("Expected result should have an explanation"); - } - - /// - /// Creates an ElasticsearchGateway instance using configuration from the distributed application. - /// - private ElasticsearchGateway CreateElasticsearchGateway() - { - // Build a new ConfigurationBuilder to read user secrets - var configBuilder = new ConfigurationBuilder(); - configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - var userSecretsConfig = configBuilder.Build(); - - // Get Elasticsearch configuration with fallback chain: user secrets → configuration → environment - var elasticsearchUrl = userSecretsConfig["Parameters:DocumentationElasticUrl"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL") - ?? throw new InvalidOperationException("Elasticsearch URL not configured"); - - var elasticsearchApiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY") - ?? throw new InvalidOperationException("Elasticsearch API key not configured"); - - // Create a test parameter provider with the configuration values - var parameterProvider = new TestParameterProvider(elasticsearchUrl, elasticsearchApiKey, "semantic-docs-dev-latest"); - var options = new ElasticsearchOptions(parameterProvider); - - return new ElasticsearchGateway(options, NullLogger.Instance); - } - - /// - /// Simple test implementation of IParameterProvider that returns configured values. - /// - private sealed class TestParameterProvider(string url, string apiKey, string indexName) : IParameterProvider - { - public Task GetParam(string name, bool withDecryption = true, Cancel ctx = default) => - name switch - { - "docs-elasticsearch-url" => Task.FromResult(url), - "docs-elasticsearch-apikey" => Task.FromResult(apiKey), - "docs-elasticsearch-index" => Task.FromResult(indexName), - _ => throw new ArgumentException($"Parameter '{name}' not configured in test provider") - }; - } -} From 149908ab9fd039bc9c4f4849e8277c60475de8a8 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Fri, 28 Nov 2025 10:00:47 +0100 Subject: [PATCH 4/5] update SkipUnless --- .editorconfig | 9 +++++++++ .../SearchRelevanceTests.cs | 16 +++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/.editorconfig b/.editorconfig index 78054a2f1..15a894a5c 100644 --- a/.editorconfig +++ b/.editorconfig @@ -151,6 +151,15 @@ csharp_style_expression_bodied_accessors = true:suggestion csharp_style_expression_bodied_local_functions = when_on_single_line:error dotnet_style_prefer_conditional_expression_over_return = false +csharp_alignment_tab_fill_style=optimal_fill +csharp_align_multiline_parameter=false +csharp_align_multiline_extends_list=false +csharp_align_multiline_array_and_object_initializer=false +csharp_align_multiline_switch_expression=false +csharp_align_multiline_property_pattern=false +csharp_align_multiline_list_pattern=false +indent_braces_inside_statement_conditions=false + # Suggest more modern language features when available csharp_style_pattern_matching_over_is_with_cast_check = true:error diff --git a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs index 902e5dfbc..614d91ebf 100644 --- a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs +++ b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs @@ -48,6 +48,7 @@ public async Task SearchReturnsExpectedFirstResultWithExplain(string query, stri { // Arrange - Create ElasticsearchGateway directly var gateway = CreateElasticsearchGateway(); + Assert.SkipUnless(gateway is not null, "Elasticsearch is not connected"); var canConnect = await gateway.CanConnect(TestContext.Current.CancellationToken); Assert.SkipUnless(canConnect, "Elasticsearch is not connected"); @@ -153,6 +154,7 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() { // Arrange var gateway = CreateElasticsearchGateway(); + Assert.SkipUnless(gateway is not null, "Elasticsearch is not connected"); var canConnect = await gateway.CanConnect(TestContext.Current.CancellationToken); Assert.SkipUnless(canConnect, "Elasticsearch is not connected"); @@ -189,7 +191,7 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() /// /// Creates an ElasticsearchGateway instance using configuration from the distributed application. /// - private ElasticsearchGateway CreateElasticsearchGateway() + private ElasticsearchGateway? CreateElasticsearchGateway() { // Build a new ConfigurationBuilder to read user secrets var configBuilder = new ConfigurationBuilder(); @@ -197,14 +199,18 @@ private ElasticsearchGateway CreateElasticsearchGateway() var userSecretsConfig = configBuilder.Build(); // Get Elasticsearch configuration with fallback chain: user secrets → configuration → environment - var elasticsearchUrl = userSecretsConfig["Parameters:DocumentationElasticUrl"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL") - ?? throw new InvalidOperationException("Elasticsearch URL not configured"); + var elasticsearchUrl = + userSecretsConfig["Parameters:DocumentationElasticUrl"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL"); - var elasticsearchApiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] + var elasticsearchApiKey = + userSecretsConfig["Parameters:DocumentationElasticApiKey"] ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY") ?? throw new InvalidOperationException("Elasticsearch API key not configured"); + if (elasticsearchUrl is null or "") + return null; + // Create a test parameter provider with the configuration values var parameterProvider = new TestParameterProvider(elasticsearchUrl, elasticsearchApiKey, "semantic-docs-dev-latest"); var options = new ElasticsearchOptions(parameterProvider); From efc245a57c46a3e529af9d5ded8bc74f674ac1d7 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Fri, 28 Nov 2025 12:46:32 +0100 Subject: [PATCH 5/5] update SkipUnless --- .../Search.IntegrationTests/SearchRelevanceTests.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs index 614d91ebf..cd0007ff1 100644 --- a/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs +++ b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs @@ -205,10 +205,9 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() var elasticsearchApiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY") - ?? throw new InvalidOperationException("Elasticsearch API key not configured"); + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY"); - if (elasticsearchUrl is null or "") + if (elasticsearchUrl is null or "" || elasticsearchApiKey is null or "") return null; // Create a test parameter provider with the configuration values