diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs index 3b8e42a23..a0942767f 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchExporter.cs @@ -62,7 +62,6 @@ DistributedTransport transport SearchInferenceId = endpoint.NoElasticInferenceService ? null : ".elser-2-elastic" }); - public abstract class ElasticsearchExporter : IDisposable where TChannelOptions : CatalogIndexChannelOptionsBase where TChannel : CatalogIndexChannel @@ -149,14 +148,14 @@ protected static string CreateMappingSetting(string synonymSetName) => "analysis": { "analyzer": { "synonyms_analyzer": { - "tokenizer": "whitespace", + "tokenizer": "group_tokenizer", "filter": [ "lowercase", "synonyms_filter" ] }, "highlight_analyzer": { - "tokenizer": "standard", + "tokenizer": "group_tokenizer", "filter": [ "lowercase", "english_stop" @@ -176,7 +175,11 @@ protected static string CreateMappingSetting(string synonymSetName) => } }, "tokenizer": { - "path_tokenizer": { + "group_tokenizer": { + "type": "char_group", + "tokenize_on_chars": [ "whitespace", ",", ";", "?", "!", "(", ")", "&", "'", "\"", "/", "[", "]", "{", "}" ] + }, + "path_tokenizer": { "type": "path_hierarchy", "delimiter": "/" } @@ -243,9 +246,13 @@ protected static string CreateMapping(string? inferenceId) => }, "stripped_body": { "type": "text", - "search_analyzer": "highlight_analyzer", + "search_analyzer": "synonyms_analyzer", "term_vector": "with_positions_offsets" - } + }, + "headings": { + "type": "text", + "search_analyzer": "synonyms_analyzer" + }, {{(!string.IsNullOrWhiteSpace(inferenceId) ? AbstractInferenceMapping(inferenceId) : AbstractMapping())}} } } @@ -253,7 +260,10 @@ protected static string CreateMapping(string? inferenceId) => private static string AbstractMapping() => """ - , "abstract": { "type": "text" } + , "abstract": { + "type": "text", + "search_analyzer": "synonyms_analyzer" + } """; private static string InferenceMapping(string inferenceId) => @@ -278,5 +288,4 @@ public void Dispose() GC.SuppressFinalize(this); } - } diff --git a/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs b/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs index 49b23cb9f..7d5d4e6de 100644 --- a/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs +++ b/src/api/Elastic.Documentation.Api.Core/Search/SearchUsecase.cs @@ -72,6 +72,7 @@ public record SearchResultItem public required string Title { get; init; } public required string Description { get; init; } public required SearchResultItemParent[] Parents { get; init; } + public string[]? Headings { get; init; } public float Score { get; init; } public string? HighlightedBody { get; init; } } diff --git a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs index 615ac9567..d2bbaaf33 100644 --- a/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs +++ b/src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs @@ -37,6 +37,9 @@ internal sealed record DocumentDto [JsonPropertyName("url_segment_count")] public int UrlSegmentCount { get; init; } + [JsonPropertyName("headings")] + public string[] Headings { get; init; } = []; + [JsonPropertyName("parents")] public ParentDocumentDto[] Parents { get; init; } = []; @@ -88,10 +91,15 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger(f => f.Title.Suffix("keyword")), searchQuery) { Boost = 10.0f, CaseInsensitive = true } + || new MatchPhrasePrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 9.0f } || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Operator = Operator.And, Boost = 8.0f } || new MatchBoolPrefixQuery(Infer.Field(f => f.Title), searchQuery) { Boost = 6.0f } - || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Boost = 4.0f } - || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Boost = 3.0f } + || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.And, Boost = 5.0f } + || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.And, Boost = 4.5f } + || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.And, Boost = 4.5f } + || new MatchQuery(Infer.Field(f => f.Abstract), searchQuery) { Operator = Operator.Or, Boost = 4.0f } + || new MatchQuery(Infer.Field(f => f.StrippedBody), searchQuery) { Operator = Operator.Or, Boost = 3.0f } + || new MatchQuery(Infer.Field(f => f.Headings), searchQuery) { Operator = Operator.Or, Boost = 3.0f } || new MatchQuery(Infer.Field(f => f.Parents.First().Title), searchQuery) { Boost = 2.0f } || new MatchQuery(Infer.Field(f => f.Title), searchQuery) { Fuzziness = 1, Boost = 1.0f } ) @@ -129,7 +137,8 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger e.Title, e => e.Url, e => e.Description, - e => e.Parents + e => e.Parents, + e => e.Headings ) ) ) @@ -193,6 +202,7 @@ private static (int TotalHits, List Results) ProcessSearchResp Url = doc.Url, Title = doc.Title, Description = doc.Description ?? string.Empty, + Headings = doc.Headings, Parents = doc.Parents.Select(parent => new SearchResultItemParent { Title = parent.Title,