diff --git a/.editorconfig b/.editorconfig index 2216430f8..15a894a5c 100644 --- a/.editorconfig +++ b/.editorconfig @@ -151,6 +151,15 @@ csharp_style_expression_bodied_accessors = true:suggestion csharp_style_expression_bodied_local_functions = when_on_single_line:error dotnet_style_prefer_conditional_expression_over_return = false +csharp_alignment_tab_fill_style=optimal_fill +csharp_align_multiline_parameter=false +csharp_align_multiline_extends_list=false +csharp_align_multiline_array_and_object_initializer=false +csharp_align_multiline_switch_expression=false +csharp_align_multiline_property_pattern=false +csharp_align_multiline_list_pattern=false +indent_braces_inside_statement_conditions=false + # Suggest more modern language features when available csharp_style_pattern_matching_over_is_with_cast_check = true:error @@ -172,7 +181,7 @@ csharp_new_line_before_open_brace = all csharp_new_line_before_else = true csharp_new_line_before_catch = true csharp_new_line_before_finally = true -csharp_new_line_before_members_in_object_initializers = true +csharp_new_line_before_members_in_object_initializers = false # just a suggestion do to our JSON tests that use anonymous types to # represent json quite a bit (makes copy paste easier). @@ -190,6 +199,9 @@ csharp_space_between_method_call_parameter_list_parentheses = false #Wrap csharp_preserve_single_line_statements = false csharp_preserve_single_line_blocks = true +csharp_place_simple_initializer_on_single_line = true +csharp_max_initializer_elements_on_line = 5 +csharp_wrap_object_and_collection_initializer_style = wrap_if_long resharper_wrap_object_and_collection_initializer_style = chop_always # Resharper diff --git a/config/synonyms.yml b/config/synonyms.yml index a8707dbe6..79403ec52 100644 --- a/config/synonyms.yml +++ b/config/synonyms.yml @@ -29,4 +29,6 @@ synonyms: - [ "ecs", "elastic common schema" ] - [ "ml", "machine learning" ] - [ "eis", "elastic inference service" ] - - [ "traffic filter", "network security" ] \ No newline at end of file + - [ "traffic filter", "network security" ] + - [ "sso", "single sign-on" ] + - [ "querydsl", "query dsl", "query dsl"] diff --git a/docs-builder.slnx b/docs-builder.slnx index 89bab2d9b..4d030b6ef 100644 --- a/docs-builder.slnx +++ b/docs-builder.slnx @@ -78,6 +78,7 @@ + diff --git a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj index 155e8056c..4357d65ce 100644 --- a/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj +++ b/src/Elastic.Documentation.ServiceDefaults/Elastic.Documentation.ServiceDefaults.csproj @@ -8,7 +8,6 @@ - diff --git a/src/Elastic.Documentation/Search/DocumentationDocument.cs b/src/Elastic.Documentation/Search/DocumentationDocument.cs index e4af64538..3ca50c854 100644 --- a/src/Elastic.Documentation/Search/DocumentationDocument.cs +++ b/src/Elastic.Documentation/Search/DocumentationDocument.cs @@ -38,6 +38,13 @@ public record DocumentationDocument [JsonPropertyName("hash")] public string Hash { get; set; } = string.Empty; + /// + /// Search title is a combination of the title and the url components. + /// This is used for querying to not reward documents with short titles contributing to heavily to scoring + /// + [JsonPropertyName("search_title")] + public string? SearchTitle { get; set; } + [JsonPropertyName("title")] public string? Title { get; set; } diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs index 6f94a3678..ae8c7b517 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs @@ -151,7 +151,8 @@ protected static string CreateMappingSetting(string synonymSetName) => "tokenizer": "group_tokenizer", "filter": [ "lowercase", - "synonyms_filter" + "synonyms_filter", + "kstem" ] }, "highlight_analyzer": { @@ -231,13 +232,19 @@ protected static string CreateMapping(string? inferenceId) => } }, "hash" : { "type" : "keyword" }, + "search_title": { + "type": "text", + "search_analyzer": "synonyms_analyzer", + "fields": { + "completion": { "type": "search_as_you_type" } + } + }, "title": { "type": "text", "search_analyzer": "synonyms_analyzer", "fields": { - "keyword": { - "type": "keyword" - } + "keyword": { "type": "keyword" }, + "completion": { "type": "search_as_you_type", "search_analyzer": "synonyms_analyzer" } {{(!string.IsNullOrWhiteSpace(inferenceId) ? $$""", "semantic_text": {{{InferenceMapping(inferenceId)}}}""" : "")}} } }, @@ -255,34 +262,24 @@ protected static string CreateMapping(string? inferenceId) => "headings": { "type": "text", "search_analyzer": "synonyms_analyzer" + }, + "abstract": { + "type" : "text", + "search_analyzer": "synonyms_analyzer", + "fields" : { + {{(!string.IsNullOrWhiteSpace(inferenceId) ? $"\"semantic_text\": {{{InferenceMapping(inferenceId)}}}" : "")}} + } } - {{(!string.IsNullOrWhiteSpace(inferenceId) ? AbstractInferenceMapping(inferenceId) : AbstractMapping())}} } } """; - private static string AbstractMapping() => - """ - , "abstract": { - "type": "text", - "search_analyzer": "synonyms_analyzer" - } - """; - private static string InferenceMapping(string inferenceId) => $""" "type": "semantic_text", "inference_id": "{inferenceId}" """; - private static string AbstractInferenceMapping(string inferenceId) => - // langugage=json - $$""" - , "abstract": { - {{InferenceMapping(inferenceId)}} - } - """; - public void Dispose() { diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs index f64479b03..815e3deb7 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.cs @@ -400,6 +400,12 @@ private void AssignDocumentMetadata(DocumentationDocument doc) doc.BatchIndexDate = _batchIndexDate; } + private void CommonEnrichments(DocumentationDocument doc) + { + var urlComponents = doc.Url.Split('/'); + doc.SearchTitle = $"{doc.Title} - ({string.Join(" ", urlComponents)}"; + } + public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, Cancel ctx) { var file = fileContext.SourceFile; @@ -456,6 +462,7 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, }; AssignDocumentMetadata(doc); + CommonEnrichments(doc); if (_indexStrategy == IngestStrategy.Multiplex) return await _lexicalChannel.TryWrite(doc, ctx) && await _semanticChannel.TryWrite(doc, ctx); @@ -490,6 +497,7 @@ public async ValueTask FinishExportAsync(IDirectoryInfo outputFolder, Canc : string.Empty; doc.Abstract = @abstract; doc.Headings = headings; + CommonEnrichments(doc); // Write to channels following the multiplex or reindex strategy if (_indexStrategy == IngestStrategy.Multiplex) diff --git a/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs b/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs index 4c696c820..20d6ed6f9 100644 --- a/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs +++ b/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs @@ -2,7 +2,6 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using System.Diagnostics; using Microsoft.Extensions.Logging; namespace Elastic.Documentation.Api.Core.Search; diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs index 9555e54be..0f420ab09 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -19,6 +19,9 @@ internal sealed record DocumentDto [JsonPropertyName("title")] public required string Title { get; init; } + [JsonPropertyName("search_title")] + public required string SearchTitle { get; init; } + [JsonPropertyName("url")] public required string Url { get; init; } @@ -83,54 +86,97 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger CanConnect(Cancel ctx) => (await _client.PingAsync(ctx)).IsValidResponse; + public async Task<(int TotalHits, List Results)> SearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) => await HybridSearchWithRrfAsync(query, pageNumber, pageSize, ctx); /// /// Builds the lexical search query for the given search term. /// - private static Query BuildLexicalQuery(string searchQuery) => - ((Query)new PrefixQuery(Infer.Field(f => f.Title.Suffix("keyword")), searchQuery) { Boost = 10.0f, CaseInsensitive = true } - || new MatchPhrasePrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 9.0f } - || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Operator = Operator.And, Boost = 8.0f } - || new MatchBoolPrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 6.0f } - || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.And, Boost = 5.0f } - || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.And, Boost = 4.5f } - || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.And, Boost = 4.5f } - || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.Or, Boost = 4.0f } - || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.Or, Boost = 3.0f } - || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.Or, Boost = 3.0f } - || new MatchQuery(Infer.Field(f => f.Parents.First().Title), searchQuery) { Boost = 2.0f } - || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Fuzziness = 1, Boost = 1.0f } - ) - && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])) - && !(Query)new TermQuery { Field = Infer.Field(f => f.Hidden), Value = true }; + private static Query BuildLexicalQuery(string searchQuery) + { + var tokens = searchQuery.Split(" "); + if (tokens is ["datastream" or "datastreams" or "data-stream" or "data-streams"]) + { + // /docs/api/doc/kibana/operation/operation-delete-fleet-epm-packages-pkgname-pkgversion-datastream-assets + // Is the only page that uses "datastream" instead of "data streams" this gives it an N of 1 in the entire corpus + // which is hard to fix through tweaking boosting, should update the page to use "data streams" instead + searchQuery = "data streams"; + tokens = ["data", "streams"]; + } + + var query = + (Query)new MultiMatchQuery + { + Query = searchQuery, Operator = Operator.And, Type = TextQueryType.BoolPrefix, + Analyzer = "synonyms_analyzer", + Boost = 2.0f, + Fields = new[] + { + "search_title.completion", + "search_title.completion._2gram", + "search_title.completion._3gram" + } + } + || new MultiMatchQuery + { + Query = searchQuery, Operator = Operator.And, Type = TextQueryType.BestFields, + Analyzer = "synonyms_analyzer", + Boost = 0.2f, + Fields = new[] + { + "stripped_body" + } + }; + // If the search term is a single word, boost the URL match + // This is to ensure that URLs that contain the search term are ranked higher than URLs that don't + // We dampen the boost by wrapping it in a constant score query + // This allows a query for `templates` which is an overloaded term to yield pages that contain `templates` in the URL + if (tokens.Length == 1) + { + query |= new ConstantScoreQuery + { + Filter = new MatchQuery + { + Field = Infer.Field(f => f.Url.Suffix("match")), + Query = searchQuery + }, + Boost = 1 + }; + } + + return new BoostingQuery + { + Positive = query, + NegativeBoost = 0.8, + Negative = new MultiMatchQuery + { + Query = "plugin client integration", Operator = Operator.Or, Fields = new[] { "search_title", "headings", "url.match" } + } + }; + } /// /// Builds the semantic search query for the given search term. /// private static Query BuildSemanticQuery(string searchQuery) => - ((Query)new SemanticQuery("title.semantic_text", searchQuery) { Boost = 5.0f } - || new SemanticQuery("abstract.semantic_text", searchQuery) { Boost = 3.0f } - ) - && !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), - new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])) - && !(Query)new TermQuery { Field = Infer.Field(f => f.Hidden), Value = true }; + (Query)new SemanticQuery("title.semantic_text", searchQuery) { Boost = 5.0f } + || new SemanticQuery("abstract.semantic_text", searchQuery) { Boost = 3.0f }; - /// - /// Normalizes the search query by replacing "dotnet" with "net". - /// - private static string NormalizeSearchQuery(string query) => - query.Replace("dotnet", "net", StringComparison.InvariantCultureIgnoreCase); + private static Query BuildFilter() => !(Query)new TermsQuery(Infer.Field(f => f.Url.Suffix("keyword")), + new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"])) + && !(Query)new TermQuery { Field = Infer.Field(f => f.Hidden), Value = true }; - public async Task<(int TotalHits, List Results)> HybridSearchWithRrfAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) + public async Task<(int TotalHits, List Results)> HybridSearchWithRrfAsync(string query, int pageNumber, int pageSize, + Cancel ctx = default) { _logger.LogInformation("Starting RRF hybrid search for '{Query}' with pageNumber={PageNumber}, pageSize={PageSize}", query, pageNumber, pageSize); const string preTag = ""; const string postTag = ""; - var searchQuery = NormalizeSearchQuery(query); + var searchQuery = query; var lexicalSearchRetriever = BuildLexicalQuery(searchQuery); var semanticSearchRetriever = BuildSemanticQuery(searchQuery); @@ -138,52 +184,71 @@ private static string NormalizeSearchQuery(string query) => { var response = await _client.SearchAsync(s => s .Indices(_elasticsearchOptions.IndexName) - .Retriever(r => r - .Rrf(rrf => rrf - .Retrievers( - // Lexical/Traditional search retriever - ret => ret.Standard(std => std.Query(lexicalSearchRetriever)), - // Semantic search retriever - ret => ret.Standard(std => std.Query(semanticSearchRetriever)) + .From(Math.Max(pageNumber - 1, 0) * pageSize) + .Size(pageSize) + .PostFilter(BuildFilter()) + .Query(BuildLexicalQuery(query)) + // .Retriever(r => r + // .Rrf(rrf => rrf + // .Filter(BuildFilter()) + // .Retrievers( + // // Lexical/Traditional search retriever + // ret => ret.Standard(std => std.Query(lexicalSearchRetriever)), + // // Semantic search retriever + // ret => ret.Standard(std => std.Query(semanticSearchRetriever)) + // ) + // .RankConstant(60) // Controls how much weight is given to document ranking + // .RankWindowSize(100) + // ) + // ) + .Source(sf => sf + .Filter(f => f + .Includes( + e => e.Type, + e => e.Title, + e => e.SearchTitle, + e => e.Url, + e => e.Description, + e => e.Parents, + e => e.Headings ) - .RankConstant(60) // Controls how much weight is given to document ranking - .RankWindowSize(100) ) ) - .From((pageNumber - 1) * pageSize) - .Size(pageSize) - .Source(sf => sf - .Filter(f => f - .Includes( - e => e.Type, - e => e.Title, - e => e.Url, - e => e.Description, - e => e.Parents, - e => e.Headings - ) - ) - ) - .Highlight(h => h - .RequireFieldMatch(true) - .Fields(f => f - .Add(Infer.Field(d => d.StrippedBody), hf => hf - .FragmentSize(150) - .NumberOfFragments(3) - .NoMatchSize(150) - .BoundaryChars(":.!?\t\n") - .BoundaryScanner(BoundaryScanner.Sentence) - .BoundaryMaxScan(15) - .FragmentOffset(0) - .HighlightQuery(q => q.Match(m => m - .Field(d => d.StrippedBody) - .Query(searchQuery) - .Analyzer("highlight_analyzer") - )) - .PreTags(preTag) - .PostTags(postTag)) - ) - ), ctx); + .Highlight(h => h + .RequireFieldMatch(true) + .Fields(f => f + .Add(Infer.Field(d => d.Title), hf => hf + .FragmentSize(150) + .NumberOfFragments(3) + .NoMatchSize(150) + .BoundaryChars(":.!?\t\n") + .BoundaryScanner(BoundaryScanner.Sentence) + .BoundaryMaxScan(15) + .FragmentOffset(0) + .HighlightQuery(q => q.Match(m => m + .Field(d => d.Title) + .Query(searchQuery) + .Analyzer("highlight_analyzer") + )) + .PreTags(preTag) + .PostTags(postTag)) + .Add(Infer.Field(d => d.StrippedBody), hf => hf + .FragmentSize(150) + .NumberOfFragments(3) + .NoMatchSize(150) + .BoundaryChars(":.!?\t\n") + .BoundaryScanner(BoundaryScanner.Sentence) + .BoundaryMaxScan(15) + .FragmentOffset(0) + .HighlightQuery(q => q.Match(m => m + .Field(d => d.StrippedBody) + .Query(searchQuery) + .Analyzer("highlight_analyzer") + )) + .PreTags(preTag) + .PostTags(postTag)) + ) + ), ctx); if (!response.IsValidResponse) { @@ -245,14 +310,14 @@ private static (int TotalHits, List Results) ProcessSearchResp /// public async Task ExplainDocumentAsync(string query, string documentUrl, Cancel ctx = default) { - var searchQuery = NormalizeSearchQuery(query); + var searchQuery = query; var lexicalQuery = BuildLexicalQuery(searchQuery); - var semanticQuery = BuildSemanticQuery(searchQuery); + //var semanticQuery = BuildSemanticQuery(searchQuery); // Combine queries with bool should to match RRF behavior var combinedQuery = (Query)new BoolQuery { - Should = [lexicalQuery, semanticQuery], + Should = [lexicalQuery], MinimumShouldMatch = 1 }; @@ -268,6 +333,7 @@ public async Task ExplainDocumentAsync(string query, string docum { return new ExplainResult { + SearchTitle = "N/A", DocumentUrl = documentUrl, Found = false, Explanation = $"Document with URL '{documentUrl}' not found in index" @@ -284,6 +350,7 @@ public async Task ExplainDocumentAsync(string query, string docum { return new ExplainResult { + SearchTitle = "N/A", DocumentUrl = documentUrl, Found = true, Matched = false, @@ -294,6 +361,7 @@ public async Task ExplainDocumentAsync(string query, string docum return new ExplainResult { DocumentUrl = documentUrl, + SearchTitle = getDocResponse.Documents.First().SearchTitle, Found = true, Matched = explainResponse.Matched, Score = explainResponse.Explanation?.Value ?? 0, @@ -305,6 +373,7 @@ public async Task ExplainDocumentAsync(string query, string docum _logger.LogError(ex, "Error explaining document '{Url}' for query '{Query}'", documentUrl, query); return new ExplainResult { + SearchTitle = "N/A", DocumentUrl = documentUrl, Found = false, Explanation = $"Exception during explain: {ex.Message}" @@ -351,6 +420,7 @@ private static string FormatExplanation(Elastic.Clients.Elasticsearch.Core.Expla { var emptyResult = new ExplainResult { + SearchTitle = "N/A", DocumentUrl = "N/A", Found = false, Explanation = "No search results returned" @@ -371,6 +441,7 @@ private static string FormatExplanation(Elastic.Clients.Elasticsearch.Core.Expla /// public sealed record ExplainResult { + public required string SearchTitle { get; init; } public required string DocumentUrl { get; init; } public bool Found { get; init; } public bool Matched { get; init; } diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs index 24afadb08..b1bbfe64d 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Aws/LocalParameterProvider.cs @@ -2,10 +2,27 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using Microsoft.Extensions.Configuration; + namespace Elastic.Documentation.Api.Infrastructure.Aws; public class LocalParameterProvider : IParameterProvider { + private readonly string? _elasticUrlFromSecret; + private readonly string? _elasticApiKeyFromSecret; + + public LocalParameterProvider() + { + // Build a new ConfigurationBuilder to read user secrets + var configBuilder = new ConfigurationBuilder(); + _ = configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); + var userSecretsConfig = configBuilder.Build(); + + _elasticUrlFromSecret = userSecretsConfig["Parameters:DocumentationElasticUrl"]; + _elasticApiKeyFromSecret = userSecretsConfig["Parameters:DocumentationElasticApiKey"]; + + } + public async Task GetParam(string name, bool withDecryption = true, Cancel ctx = default) { switch (name) @@ -24,11 +41,11 @@ public async Task GetParam(string name, bool withDecryption = true, Canc } case "docs-elasticsearch-url": { - return GetEnv("DOCUMENTATION_ELASTIC_URL"); + return GetEnv("DOCUMENTATION_ELASTIC_URL", _elasticUrlFromSecret); } case "docs-elasticsearch-apikey": { - return GetEnv("DOCUMENTATION_ELASTIC_APIKEY"); + return GetEnv("DOCUMENTATION_ELASTIC_APIKEY", _elasticApiKeyFromSecret); } case "docs-kibana-url": { diff --git a/src/api/Elastic.Documentation.Api.Lambda/Elastic.Documentation.Api.Lambda.csproj b/src/api/Elastic.Documentation.Api.Lambda/Elastic.Documentation.Api.Lambda.csproj index b209a46ba..061b59867 100644 --- a/src/api/Elastic.Documentation.Api.Lambda/Elastic.Documentation.Api.Lambda.csproj +++ b/src/api/Elastic.Documentation.Api.Lambda/Elastic.Documentation.Api.Lambda.csproj @@ -21,7 +21,6 @@ - diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs new file mode 100644 index 000000000..8438bd58f --- /dev/null +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchBootstrapFixture.cs @@ -0,0 +1,283 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using Aspire.Hosting.ApplicationModel; +using Aspire.Hosting.Testing; +using Documentation.Builder.Diagnostics.Console; +using Elastic.Documentation.Aspire; +using Elastic.Documentation.Configuration; +using Elastic.Ingest.Elasticsearch; +using Elastic.Markdown.Exporters.Elasticsearch; +using Elastic.Transport; +using Elastic.Transport.Products.Elasticsearch; +using FluentAssertions; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; + +namespace Elastic.Assembler.IntegrationTests.Search; + +[CollectionDefinition(Collection)] +public class SearchBootstrapFixture(DocumentationFixture fixture) : IAsyncLifetime +{ + public const string Collection = "Search"; + public HttpClient HttpClient { get; private set; } = null!; + public bool Connected { get; private set; } + + /// + /// Initializes the test by ensuring AssemblerServe (which hosts the API) is healthy and Elasticsearch is indexed. + /// Checks if the remote Elasticsearch already has up-to-date data to avoid unnecessary indexing. + /// + public async ValueTask InitializeAsync() + { + try + { + // Wait for AssemblerServe to be ready (it hosts the embedded Lambda API) + Console.WriteLine("Waiting for AssemblerServe (with embedded API) to become healthy..."); + await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceHealthyAsync(ResourceNames.AssemblerServe, cancellationToken: TestContext.Current.CancellationToken) + .WaitAsync(TimeSpan.FromMinutes(2), TestContext.Current.CancellationToken); + + Console.WriteLine("AssemblerServe is healthy. Creating HTTP client..."); + + // Get the HTTP client for AssemblerServe which includes the API endpoints + HttpClient = fixture.DistributedApplication.CreateHttpClient(ResourceNames.AssemblerServe, "http"); + HttpClient.Should().NotBeNull("Should be able to create HTTP client for AssemblerServe"); + + // Check if Elasticsearch already has up-to-date data + var indexingNeeded = await IsIndexingNeeded(); + + if (!Connected) + { + Console.WriteLine("Can not connect to Elasticsearch. Skipping indexing."); + return; + } + + if (!indexingNeeded) + { + Console.WriteLine("Elasticsearch already has up-to-date data. Skipping indexing."); + return; + } + + Console.WriteLine("Elasticsearch needs indexing. Manually starting indexer..."); + + // The indexer always has WithExplicitStart(), so we must manually start it + // Get the ResourceLoggerService to send the start command + fixture.DistributedApplication.Services + .GetRequiredService(); + + // Get the resource notification service to find the resource + fixture.DistributedApplication.Services + .GetRequiredService(); + + // Wait for the resource to be available + var resourceEvent = await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceAsync(ResourceNames.ElasticsearchIngest, _ => true, TestContext.Current.CancellationToken) + .WaitAsync(TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); + + // Get the resource instance + var resource = resourceEvent.Resource; + + // Execute the start command using ResourceCommandAnnotation + var startCommand = resource.Annotations.OfType() + .FirstOrDefault(a => a.Name == "resource-start"); + + if (startCommand != null) + { + Console.WriteLine($"Executing start command for {ResourceNames.ElasticsearchIngest}..."); + + // Create ExecuteCommandContext for the start command + var commandContext = new ExecuteCommandContext + { + ResourceName = resourceEvent.ResourceId, + ServiceProvider = fixture.DistributedApplication.Services, + CancellationToken = TestContext.Current.CancellationToken + }; + + await startCommand.ExecuteCommand(commandContext); + Console.WriteLine($"Start command executed for {ResourceNames.ElasticsearchIngest}"); + } + else + { + throw new Exception($"Could not find start command for {ResourceNames.ElasticsearchIngest}"); + } + + Console.WriteLine("Waiting for indexer to complete..."); + + // Wait for the indexer to complete + _ = await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceAsync(ResourceNames.ElasticsearchIngest, KnownResourceStates.TerminalStates, + cancellationToken: TestContext.Current.CancellationToken) + .WaitAsync(TimeSpan.FromMinutes(10), TestContext.Current.CancellationToken); + + Console.WriteLine("Elasticsearch indexer reached terminal state. Validating exit code..."); + + // Validate the indexer completed successfully + await ValidateResourceExitCode(ResourceNames.ElasticsearchIngest); + + Console.WriteLine("Elasticsearch indexing completed successfully. Tests can now run."); + } + catch (Exception e) + { + Console.WriteLine($"Failed to initialize test: {e.Message}"); + Console.WriteLine(string.Join(Environment.NewLine, + fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse())); + throw; + } + } + + /// + /// Checks if indexing is needed by comparing the channel hash in Elasticsearch + /// with the current semantic exporter channel hash. + /// Uses the same pattern as ElasticsearchMarkdownExporter. + /// + private async ValueTask IsIndexingNeeded() + { + try + { + // Get Elasticsearch configuration from Aspire + var (elasticsearchUrl, apiKey, password, username) = GetElasticsearchConfiguration(); + + if (string.IsNullOrEmpty(elasticsearchUrl)) + { + Console.WriteLine("No Elasticsearch URL configured, indexing will be performed."); + Connected = false; + return false; + } + + Console.WriteLine($"Checking remote Elasticsearch at {elasticsearchUrl} for existing data..."); + + // Create Elasticsearch endpoint configuration + var endpoint = new ElasticsearchEndpoint + { + Uri = new Uri(elasticsearchUrl), + ApiKey = apiKey, + Username = username, + Password = password + }; + + // Create transport configuration (similar to ElasticsearchMarkdownExporter) + var configuration = new ElasticsearchConfiguration(endpoint.Uri) + { + Authentication = endpoint.ApiKey is { } eApiKey + ? new ApiKey(eApiKey) + : endpoint is { Username: { } eUsername, Password: { } ePassword } + ? new BasicAuthentication(eUsername, ePassword) + : null, + EnableHttpCompression = true + }; + + var transport = new DistributedTransport(configuration); + Connected = (await transport.HeadAsync("/", TestContext.Current.CancellationToken)).ApiCallDetails.HasSuccessfulStatusCode; + + // Create a logger factory and diagnostics collector + var loggerFactory = fixture.DistributedApplication.Services.GetRequiredService(); + var collector = new ConsoleDiagnosticsCollector(loggerFactory); + + // Create semantic exporter to check channel hash (index namespace is 'dev' for tests) + using var semanticExporter = new ElasticsearchSemanticExporter( + loggerFactory, + collector, + endpoint, + "dev", // index namespace + transport + ); + + // Get the current hash from Elasticsearch index template + var currentSemanticHash = await semanticExporter.Channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty; + + // Get the expected channel hash from the semantic exporter + await semanticExporter.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, ctx: TestContext.Current.CancellationToken); + var expectedSemanticHash = semanticExporter.Channel.ChannelHash; + + Console.WriteLine($"Elasticsearch semantic hash: '{currentSemanticHash}'"); + Console.WriteLine($"Expected semantic hash: '{expectedSemanticHash}'"); + + // If hashes match, no indexing needed + if (!string.IsNullOrEmpty(currentSemanticHash) && currentSemanticHash == expectedSemanticHash) + { + Console.WriteLine("Semantic channel hashes match. Skipping indexing."); + return false; + } + + Console.WriteLine("Semantic channel hashes do not match or remote hash is empty. Indexing needed."); + return true; + } + catch (Exception ex) + { + Console.WriteLine($"Error checking Elasticsearch state: {ex.Message}. Will proceed with indexing."); + return true; // If we can't check, safer to index + } + } + + private async ValueTask ValidateResourceExitCode(string resourceName) + { + var eventResource = await fixture.DistributedApplication.ResourceNotifications + .WaitForResourceAsync(resourceName, _ => true); + var id = eventResource.ResourceId; + + if (!fixture.DistributedApplication.ResourceNotifications.TryGetCurrentState(id, out var state)) + throw new Exception($"Could not find {resourceName} in the current state"); + + if (state.Snapshot.ExitCode is not 0) + { + var recentLogs = string.Join(Environment.NewLine, + fixture.InMemoryLogger.RecordedLogs.Reverse().Take(100).Reverse()); + throw new Exception( + $"Exit code should be 0 for {resourceName}, but was {state.Snapshot.ExitCode}. Recent logs:{Environment.NewLine}{recentLogs}"); + } + + Console.WriteLine($"{resourceName} completed with exit code 0"); + } + + /// + /// Gets Elasticsearch configuration from Aspire parameters and environment. + /// Manually reads user secrets from the aspire project, then falls back to environment variables. + /// + private (string? Url, string? ApiKey, string? Password, string? Username) GetElasticsearchConfiguration() + { + // Manually read user secrets from the aspire project + // UserSecretsId from aspire.csproj: 72f50f33-6fb9-4d08-bff3-39568fe370b3 + var configBuilder = new ConfigurationBuilder(); + configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); + var userSecretsConfig = configBuilder.Build(); + + // Get URL - try user secrets first, then Aspire configuration, then environment + var url = userSecretsConfig["Parameters:DocumentationElasticUrl"] + ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticUrl"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL"); + + // Get API Key - try user secrets first, then Aspire configuration, then environment + var apiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] + ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticApiKey"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY"); + + // Get password for local Elasticsearch (when using --start-elasticsearch) + var password = userSecretsConfig["Parameters:DocumentationElasticPassword"] ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_PASSWORD"); + + // Get username (defaults to "elastic") + var username = userSecretsConfig["Parameters:DocumentationElasticUsername"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_USERNAME") + ?? "elastic"; + + Console.WriteLine($"Elasticsearch configuration retrieved: URL={url != null}, ApiKey={apiKey != null}, Password={password != null}"); + + return (url, apiKey, password, username); + } + + public ValueTask DisposeAsync() + { + HttpClient?.Dispose(); + + // Only dump logs if test failed + if (TestContext.Current.TestState?.Result is not TestResult.Passed) + { + foreach (var log in fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse()) + Console.WriteLine(log.Message); + } + + GC.SuppressFinalize(this); + return default; + } +} diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs index 2dae77bbb..2b45d165d 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs +++ b/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchTestBase.cs @@ -2,287 +2,8 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using Aspire.Hosting.ApplicationModel; -using Aspire.Hosting.Testing; -using Documentation.Builder.Diagnostics.Console; -using Elastic.Documentation.Configuration; -using Elastic.Ingest.Elasticsearch; -using Elastic.Markdown.Exporters.Elasticsearch; -using Elastic.Transport; -using Elastic.Transport.Products.Elasticsearch; -using FluentAssertions; -using Microsoft.Extensions.Configuration; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; -using static Elastic.Documentation.Aspire.ResourceNames; - namespace Elastic.Assembler.IntegrationTests.Search; - -[CollectionDefinition(Collection)] -public class SearchBootstrapFixture(DocumentationFixture fixture) : IAsyncLifetime -{ - public const string Collection = "Search"; - public HttpClient HttpClient { get; private set; } = null!; - public bool Connected { get; private set; } - - /// - /// Initializes the test by ensuring AssemblerServe (which hosts the API) is healthy and Elasticsearch is indexed. - /// Checks if the remote Elasticsearch already has up-to-date data to avoid unnecessary indexing. - /// - public async ValueTask InitializeAsync() - { - try - { - // Wait for AssemblerServe to be ready (it hosts the embedded Lambda API) - Console.WriteLine("Waiting for AssemblerServe (with embedded API) to become healthy..."); - await fixture.DistributedApplication.ResourceNotifications - .WaitForResourceHealthyAsync(AssemblerServe, cancellationToken: TestContext.Current.CancellationToken) - .WaitAsync(TimeSpan.FromMinutes(2), TestContext.Current.CancellationToken); - - Console.WriteLine("AssemblerServe is healthy. Creating HTTP client..."); - - // Get the HTTP client for AssemblerServe which includes the API endpoints - HttpClient = fixture.DistributedApplication.CreateHttpClient(AssemblerServe, "http"); - HttpClient.Should().NotBeNull("Should be able to create HTTP client for AssemblerServe"); - - // Check if Elasticsearch already has up-to-date data - var indexingNeeded = await IsIndexingNeeded(); - - if (!Connected) - { - Console.WriteLine("Can not connect to Elasticsearch. Skipping indexing."); - return; - } - - if (!indexingNeeded) - { - Console.WriteLine("Elasticsearch already has up-to-date data. Skipping indexing."); - return; - } - - Console.WriteLine("Elasticsearch needs indexing. Manually starting indexer..."); - - // The indexer always has WithExplicitStart(), so we must manually start it - // Get the ResourceLoggerService to send the start command - fixture.DistributedApplication.Services - .GetRequiredService(); - - // Get the resource notification service to find the resource - fixture.DistributedApplication.Services - .GetRequiredService(); - - // Wait for the resource to be available - var resourceEvent = await fixture.DistributedApplication.ResourceNotifications - .WaitForResourceAsync(ElasticsearchIngest, _ => true, TestContext.Current.CancellationToken) - .WaitAsync(TimeSpan.FromMinutes(1), TestContext.Current.CancellationToken); - - // Get the resource instance - var resource = resourceEvent.Resource; - - // Execute the start command using ResourceCommandAnnotation - var startCommand = resource.Annotations.OfType() - .FirstOrDefault(a => a.Name == "resource-start"); - - if (startCommand != null) - { - Console.WriteLine($"Executing start command for {ElasticsearchIngest}..."); - - // Create ExecuteCommandContext for the start command - var commandContext = new ExecuteCommandContext - { - ResourceName = resourceEvent.ResourceId, - ServiceProvider = fixture.DistributedApplication.Services, - CancellationToken = TestContext.Current.CancellationToken - }; - - await startCommand.ExecuteCommand(commandContext); - Console.WriteLine($"Start command executed for {ElasticsearchIngest}"); - } - else - { - throw new Exception($"Could not find start command for {ElasticsearchIngest}"); - } - - Console.WriteLine("Waiting for indexer to complete..."); - - // Wait for the indexer to complete - _ = await fixture.DistributedApplication.ResourceNotifications - .WaitForResourceAsync(ElasticsearchIngest, KnownResourceStates.TerminalStates, - cancellationToken: TestContext.Current.CancellationToken) - .WaitAsync(TimeSpan.FromMinutes(10), TestContext.Current.CancellationToken); - - Console.WriteLine("Elasticsearch indexer reached terminal state. Validating exit code..."); - - // Validate the indexer completed successfully - await ValidateResourceExitCode(ElasticsearchIngest); - - Console.WriteLine("Elasticsearch indexing completed successfully. Tests can now run."); - } - catch (Exception e) - { - Console.WriteLine($"Failed to initialize test: {e.Message}"); - Console.WriteLine(string.Join(Environment.NewLine, - fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse())); - throw; - } - } - - /// - /// Checks if indexing is needed by comparing the channel hash in Elasticsearch - /// with the current semantic exporter channel hash. - /// Uses the same pattern as ElasticsearchMarkdownExporter. - /// - private async ValueTask IsIndexingNeeded() - { - try - { - // Get Elasticsearch configuration from Aspire - var (elasticsearchUrl, apiKey, password, username) = GetElasticsearchConfiguration(); - - if (string.IsNullOrEmpty(elasticsearchUrl)) - { - Console.WriteLine("No Elasticsearch URL configured, indexing will be performed."); - Connected = false; - return false; - } - - Console.WriteLine($"Checking remote Elasticsearch at {elasticsearchUrl} for existing data..."); - - // Create Elasticsearch endpoint configuration - var endpoint = new ElasticsearchEndpoint - { - Uri = new Uri(elasticsearchUrl), - ApiKey = apiKey, - Username = username, - Password = password - }; - - // Create transport configuration (similar to ElasticsearchMarkdownExporter) - var configuration = new ElasticsearchConfiguration(endpoint.Uri) - { - Authentication = endpoint.ApiKey is { } eApiKey - ? new ApiKey(eApiKey) - : endpoint is { Username: { } eUsername, Password: { } ePassword } - ? new BasicAuthentication(eUsername, ePassword) - : null, - EnableHttpCompression = true - }; - - var transport = new DistributedTransport(configuration); - Connected = (await transport.HeadAsync("/", TestContext.Current.CancellationToken)).ApiCallDetails.HasSuccessfulStatusCode; - - // Create a logger factory and diagnostics collector - var loggerFactory = fixture.DistributedApplication.Services.GetRequiredService(); - var collector = new ConsoleDiagnosticsCollector(loggerFactory); - - // Create semantic exporter to check channel hash (index namespace is 'dev' for tests) - using var semanticExporter = new ElasticsearchSemanticExporter( - loggerFactory, - collector, - endpoint, - "dev", // index namespace - transport - ); - - // Get the current hash from Elasticsearch index template - var currentSemanticHash = await semanticExporter.Channel.GetIndexTemplateHashAsync(TestContext.Current.CancellationToken) ?? string.Empty; - - // Get the expected channel hash from the semantic exporter - await semanticExporter.Channel.BootstrapElasticsearchAsync(BootstrapMethod.Silent, ctx: TestContext.Current.CancellationToken); - var expectedSemanticHash = semanticExporter.Channel.ChannelHash; - - Console.WriteLine($"Elasticsearch semantic hash: '{currentSemanticHash}'"); - Console.WriteLine($"Expected semantic hash: '{expectedSemanticHash}'"); - - // If hashes match, no indexing needed - if (!string.IsNullOrEmpty(currentSemanticHash) && currentSemanticHash == expectedSemanticHash) - { - Console.WriteLine("Semantic channel hashes match. Skipping indexing."); - return false; - } - - Console.WriteLine("Semantic channel hashes do not match or remote hash is empty. Indexing needed."); - return true; - } - catch (Exception ex) - { - Console.WriteLine($"Error checking Elasticsearch state: {ex.Message}. Will proceed with indexing."); - return true; // If we can't check, safer to index - } - } - - private async ValueTask ValidateResourceExitCode(string resourceName) - { - var eventResource = await fixture.DistributedApplication.ResourceNotifications - .WaitForResourceAsync(resourceName, _ => true); - var id = eventResource.ResourceId; - - if (!fixture.DistributedApplication.ResourceNotifications.TryGetCurrentState(id, out var state)) - throw new Exception($"Could not find {resourceName} in the current state"); - - if (state.Snapshot.ExitCode is not 0) - { - var recentLogs = string.Join(Environment.NewLine, - fixture.InMemoryLogger.RecordedLogs.Reverse().Take(100).Reverse()); - throw new Exception( - $"Exit code should be 0 for {resourceName}, but was {state.Snapshot.ExitCode}. Recent logs:{Environment.NewLine}{recentLogs}"); - } - - Console.WriteLine($"{resourceName} completed with exit code 0"); - } - - /// - /// Gets Elasticsearch configuration from Aspire parameters and environment. - /// Manually reads user secrets from the aspire project, then falls back to environment variables. - /// - private (string? Url, string? ApiKey, string? Password, string? Username) GetElasticsearchConfiguration() - { - // Manually read user secrets from the aspire project - // UserSecretsId from aspire.csproj: 72f50f33-6fb9-4d08-bff3-39568fe370b3 - var configBuilder = new ConfigurationBuilder(); - configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); - var userSecretsConfig = configBuilder.Build(); - - // Get URL - try user secrets first, then Aspire configuration, then environment - var url = userSecretsConfig["Parameters:DocumentationElasticUrl"] - ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticUrl"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL"); - - // Get API Key - try user secrets first, then Aspire configuration, then environment - var apiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] - ?? fixture.DistributedApplication.Services.GetService()?["Parameters:DocumentationElasticApiKey"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY"); - - // Get password for local Elasticsearch (when using --start-elasticsearch) - var password = userSecretsConfig["Parameters:DocumentationElasticPassword"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_PASSWORD"); - - // Get username (defaults to "elastic") - var username = userSecretsConfig["Parameters:DocumentationElasticUsername"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_USERNAME") - ?? "elastic"; - - Console.WriteLine($"Elasticsearch configuration retrieved: URL={url != null}, ApiKey={apiKey != null}, Password={password != null}"); - - return (url, apiKey, password, username); - } - - public ValueTask DisposeAsync() - { - HttpClient?.Dispose(); - - // Only dump logs if test failed - if (TestContext.Current.TestState?.Result is not TestResult.Passed) - { - foreach (var log in fixture.InMemoryLogger.RecordedLogs.Reverse().Take(50).Reverse()) - Console.WriteLine(log.Message); - } - - GC.SuppressFinalize(this); - return default; - } -} /// /// Base class for search integration tests that handles initialization /// and conditional Elasticsearch indexing based on hash comparison. diff --git a/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj new file mode 100644 index 000000000..0691c1282 --- /dev/null +++ b/tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj @@ -0,0 +1,14 @@ + + + + net10.0 + enable + enable + + + + + + + + diff --git a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs similarity index 69% rename from tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs rename to tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs index 4247b459d..cd0007ff1 100644 --- a/tests-integration/Elastic.Assembler.IntegrationTests/Search/SearchRelevanceTests.cs +++ b/tests-integration/Search.IntegrationTests/SearchRelevanceTests.cs @@ -4,52 +4,53 @@ using Elastic.Documentation.Api.Infrastructure.Adapters.Search; using Elastic.Documentation.Api.Infrastructure.Aws; -using Elastic.Documentation.Configuration; using FluentAssertions; using Microsoft.Extensions.Configuration; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; -namespace Elastic.Assembler.IntegrationTests.Search; +namespace Search.IntegrationTests; /// /// Integration tests for search relevance that use ElasticsearchGateway directly /// to provide detailed explanations of search results using Elasticsearch's _explain API. /// These tests help understand and improve search ranking by showing detailed scoring breakdowns. /// -[Collection(SearchBootstrapFixture.Collection)] -public class SearchRelevanceTests(SearchBootstrapFixture searchFixture, DocumentationFixture documentationFixture, ITestOutputHelper output) : SearchTestBase +public class SearchRelevanceTests(ITestOutputHelper output) { /// /// Theory data for search queries mapped to expected first hit URLs. /// Same as SearchIntegrationTests but with detailed explain output on failures. /// - public static TheoryData SearchQueryTestCases => new() + public static TheoryData SearchQueryTestCases => new() { //TODO these results reflect today's result, we still have some work to do to improve the relevance of the search results // Elasticsearch specific queries - { "elasticsearch getting started", "/docs/reference/elasticsearch/clients/java/getting-started" }, - { "apm", "/docs/reference/apm/observability/apm" }, - { "kibana dashboard", "/docs/reference/beats/auditbeat/configuration-dashboards" }, - - // .NET specific queries (testing dotnet -> net replacement) - { "dotnet client", "/docs/reference/elasticsearch/clients/dotnet/using-net-client" }, - { ".net apm agent", "/docs/reference/apm/agents/dotnet" }, - - // General queries - { "machine learning", "/docs/reference/machine-learning" }, - { "ingest pipeline", "/docs/reference/beats/metricbeat/configuring-ingest-node" }, + { "elasticsearch get started", "/docs/solutions/search/get-started", null }, + { "elasticsearch getting started", "/docs/solutions/search/get-started", null }, + { "elastic common schema", "/docs/reference/ecs", null }, + { "ecs", "/docs/reference/ecs", null }, + { "c# client", "/docs/reference/elasticsearch/clients/dotnet", null }, + { "dotnet client", "/docs/reference/elasticsearch/clients/dotnet", null }, + { "runscript", "/docs/api/doc/kibana/operation/operation-runscriptaction", [ "/docs/solutions/security/endpoint-response-actions" ] }, + { "data-streams", "/docs/manage-data/data-store/data-streams", null }, + { "datastream", "/docs/manage-data/data-store/data-streams", null }, + { "data stream", "/docs/manage-data/data-store/data-streams", null }, + { "saml sso", "/docs/deploy-manage/users-roles/cloud-organization/register-elastic-cloud-saml-in-okta", ["/docs/deploy-manage/users-roles/cloud-organization/configure-saml-authentication"] }, + { "templates", "/docs/manage-data/data-store/templates", null}, + { "query dsl", "/docs/explore-analyze/query-filter/languages/querydsl", null}, + { "querydsl", "/docs/explore-analyze/query-filter/languages/querydsl", null} }; [Theory] [MemberData(nameof(SearchQueryTestCases))] - public async Task SearchReturnsExpectedFirstResultWithExplain(string query, string expectedFirstResultUrl) + public async Task SearchReturnsExpectedFirstResultWithExplain(string query, string expectedFirstResultUrl, string[]? additionalExpectedUrls) { - Assert.SkipUnless(searchFixture.Connected, "Elasticsearch is not connected"); - // Arrange - Create ElasticsearchGateway directly var gateway = CreateElasticsearchGateway(); + Assert.SkipUnless(gateway is not null, "Elasticsearch is not connected"); + var canConnect = await gateway.CanConnect(TestContext.Current.CancellationToken); + Assert.SkipUnless(canConnect, "Elasticsearch is not connected"); // Act - Perform the search var (totalHits, results) = await gateway.HybridSearchWithRrfAsync(query, 1, 5, TestContext.Current.CancellationToken); @@ -77,6 +78,7 @@ public async Task SearchReturnsExpectedFirstResultWithExplain(string query, stri // Output the actual top result explanation output.WriteLine("═══════════════════════════════════════════════════════════════"); output.WriteLine($"ACTUAL TOP RESULT: {topResultExplain.DocumentUrl}"); + output.WriteLine($"Search Title: {topResultExplain.SearchTitle}"); output.WriteLine($"Score: {topResultExplain.Score:F4}"); output.WriteLine($"Matched: {topResultExplain.Matched}"); output.WriteLine("───────────────────────────────────────────────────────────────"); @@ -86,6 +88,7 @@ public async Task SearchReturnsExpectedFirstResultWithExplain(string query, stri // Output the expected result explanation output.WriteLine("═══════════════════════════════════════════════════════════════"); output.WriteLine($"EXPECTED RESULT: {expectedResultExplain.DocumentUrl}"); + output.WriteLine($"Search Title: {expectedResultExplain.SearchTitle}"); output.WriteLine($"Score: {expectedResultExplain.Score:F4}"); output.WriteLine($"Matched: {expectedResultExplain.Matched}"); output.WriteLine("───────────────────────────────────────────────────────────────"); @@ -118,6 +121,32 @@ public async Task SearchReturnsExpectedFirstResultWithExplain(string query, stri output.WriteLine($"✅ First result matches expected: {actualFirstResultUrl}"); output.WriteLine($" Score: {results.First().Score:F4}"); } + + // Check for additional expected URLs if provided + if (additionalExpectedUrls?.Length > 0) + { + output.WriteLine($"\nChecking for {additionalExpectedUrls.Length} additional expected URLs on first page..."); + var resultUrls = results.Select(r => r.Url).ToList(); + + foreach (var expectedUrl in additionalExpectedUrls) + { + if (resultUrls.Contains(expectedUrl)) + { + var position = resultUrls.IndexOf(expectedUrl) + 1; + output.WriteLine($"✅ Found expected URL at position {position}: {expectedUrl}"); + } + else + { + output.WriteLine($"❌ Expected URL not found on first page: {expectedUrl}"); + output.WriteLine($" First page results ({results.Count}):"); + for (var i = 0; i < results.Count; i++) + { + output.WriteLine($" {i + 1}. {results[i].Url} (score: {results[i].Score:F4})"); + } + resultUrls.Should().Contain(expectedUrl, $"Expected URL '{expectedUrl}' should be present on the first page of results for query '{query}'"); + } + } + } } [Fact] @@ -125,6 +154,10 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() { // Arrange var gateway = CreateElasticsearchGateway(); + Assert.SkipUnless(gateway is not null, "Elasticsearch is not connected"); + var canConnect = await gateway.CanConnect(TestContext.Current.CancellationToken); + Assert.SkipUnless(canConnect, "Elasticsearch is not connected"); + const string query = "elasticsearch getting started"; const string expectedUrl = "/docs/reference/elasticsearch/clients/java/getting-started"; @@ -158,32 +191,30 @@ public async Task ExplainTopResultAndExpectedAsyncReturnsDetailedScoring() /// /// Creates an ElasticsearchGateway instance using configuration from the distributed application. /// - private ElasticsearchGateway CreateElasticsearchGateway() + private ElasticsearchGateway? CreateElasticsearchGateway() { - var configuration = documentationFixture.DistributedApplication.Services.GetRequiredService(); - var loggerFactory = documentationFixture.DistributedApplication.Services.GetRequiredService(); - // Build a new ConfigurationBuilder to read user secrets var configBuilder = new ConfigurationBuilder(); configBuilder.AddUserSecrets("72f50f33-6fb9-4d08-bff3-39568fe370b3"); var userSecretsConfig = configBuilder.Build(); // Get Elasticsearch configuration with fallback chain: user secrets → configuration → environment - var elasticsearchUrl = userSecretsConfig["Parameters:DocumentationElasticUrl"] - ?? configuration["Parameters:DocumentationElasticUrl"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL") - ?? throw new InvalidOperationException("Elasticsearch URL not configured"); + var elasticsearchUrl = + userSecretsConfig["Parameters:DocumentationElasticUrl"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_URL"); + + var elasticsearchApiKey = + userSecretsConfig["Parameters:DocumentationElasticApiKey"] + ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY"); - var elasticsearchApiKey = userSecretsConfig["Parameters:DocumentationElasticApiKey"] - ?? configuration["Parameters:DocumentationElasticApiKey"] - ?? Environment.GetEnvironmentVariable("DOCUMENTATION_ELASTIC_APIKEY") - ?? throw new InvalidOperationException("Elasticsearch API key not configured"); + if (elasticsearchUrl is null or "" || elasticsearchApiKey is null or "") + return null; // Create a test parameter provider with the configuration values var parameterProvider = new TestParameterProvider(elasticsearchUrl, elasticsearchApiKey, "semantic-docs-dev-latest"); var options = new ElasticsearchOptions(parameterProvider); - return new ElasticsearchGateway(options, loggerFactory.CreateLogger()); + return new ElasticsearchGateway(options, NullLogger.Instance); } ///