From f7c2a982bd29d1d07507287401eb6539d821fb8e Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Tue, 6 May 2025 12:29:04 -0700 Subject: [PATCH 1/4] Text field block loader properly handles null values from delegate (#127525) (cherry picked from commit 0df9d1c4c286e6569e1c5f168124dc605af1dd0d) # Conflicts: # docs/reference/elasticsearch/mapping-reference/keyword.md # docs/reference/elasticsearch/mapping-reference/text.md --- docs/reference/mapping/types/keyword.asciidoc | 37 +++++++++++++ docs/reference/mapping/types/text.asciidoc | 53 ++++++++++++------- .../index/mapper/TextFieldMapper.java | 7 +++ .../TextFieldBlockLoaderTests.java | 15 +----- 4 files changed, 78 insertions(+), 34 deletions(-) diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc index 2e397a74a0013..30c181f12a4be 100644 --- a/docs/reference/mapping/types/keyword.asciidoc +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -293,6 +293,43 @@ Will become: ---- // TEST[s/^/{"_source":/ s/\n$/}/] +If `null_value` is configured, `null` values are replaced with the `null_value` in synthetic source. +For example: +[source,console,id=synthetic-source-keyword-example-null-values] +---- +PUT idx +{ + "settings": { + "index": { + "mapping": { + "source": { + "mode": "synthetic" + } + } + } + }, + "mappings": { + "properties": { + "kwd": { "type": "keyword", "null_value": "NA" } + } + } +} +PUT idx/_doc/1 +{ + "kwd": ["foo", null, "bar"] +} +---- +// TEST[s/$/\nGET idx\/_doc\/1?filter_path=_source\n/] + +Will become: + +[source,console-result] +---- +{ + "kwd": ["bar", "foo", "NA"] +} +---- +// TEST[s/^/{"_source":/ s/\n$/}/] include::constant-keyword.asciidoc[] diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc index b10484fc5ded8..8f192a165fa17 100644 --- a/docs/reference/mapping/types/text.asciidoc +++ b/docs/reference/mapping/types/text.asciidoc @@ -168,15 +168,23 @@ be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. -`text` fields support <> if they have -a <> sub-field that supports synthetic -`_source` or if the `text` field sets `store` to `true`. Either way, it may -not have <>. - -If using a sub-`keyword` field, then the values are sorted in the same way as -a `keyword` field's values are sorted. By default, that means sorted with -duplicates removed. So: -[source,console,id=synthetic-source-text-example-default] +`text` fields can use a <> sub-field to support +<> without storing values of the text field itself. + +In this case, the synthetic source of the `text` field will have the same <> as a `keyword` field. + +These modifications can impact usage of `text` fields: +* Reordering text fields can have an effect on <> + and <> queries. See the discussion about + <> for more detail. You + can avoid this by making sure the `slop` parameter on the phrase queries + is lower than the `position_increment_gap`. This is the default. +* Handling of `null` values is different. `text` fields ignore `null` values, +but `keyword` fields support replacing `null` values with a value specified in the `null_value` parameter. +This replacement is represented in synthetic source. + +For example: +[source,console,id=synthetic-source-text-example-multi-field] ---- PUT idx { @@ -194,8 +202,9 @@ PUT idx "text": { "type": "text", "fields": { - "raw": { - "type": "keyword" + "kwd": { + "type": "keyword", + "null_value": "NA" } } } @@ -205,9 +214,10 @@ PUT idx PUT idx/_doc/1 { "text": [ + null, "the quick brown fox", "the quick brown fox", - "jumped over the lazy dog" + "jumped over the lazy dog", ] } ---- @@ -218,19 +228,14 @@ Will become: ---- { "text": [ - "jumped over the lazy dog", + "jumped over the lazy dog" + "NA", "the quick brown fox" ] } ---- // TEST[s/^/{"_source":/ s/\n$/}/] -NOTE: Reordering text fields can have an effect on <> - and <> queries. See the discussion about - <> for more detail. You - can avoid this by making sure the `slop` parameter on the phrase queries - is lower than the `position_increment_gap`. This is the default. - If the `text` field sets `store` to true then order and duplicates are preserved. [source,console,id=synthetic-source-text-example-stored] @@ -248,7 +253,15 @@ PUT idx }, "mappings": { "properties": { - "text": { "type": "text", "store": true } + "text": { + "type": "text", + "store": true, + "fields": { + "raw": { + "type": "keyword" + } + } + } } } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 4991fb3ffae9e..60eac41395ae8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -1075,6 +1075,13 @@ public Builder builder(BlockFactory factory, int expectedCount) { * using whatever */ private BlockSourceReader.LeafIteratorLookup blockReaderDisiLookup(BlockLoaderContext blContext) { + if (isSyntheticSource && syntheticSourceDelegate != null) { + // Since we are using synthetic source and a delegate, we can't use this field + // to determine if the delegate has values in the document (f.e. handling of `null` is different + // between text and keyword). + return BlockSourceReader.lookupMatchingAll(); + } + if (isIndexed()) { if (getTextSearchInfo().hasNorms()) { return BlockSourceReader.lookupFromNorms(name()); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java index 5c9acaf18a45d..77c42740451ee 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java @@ -62,15 +62,8 @@ public static Object expectedValue(Map fieldMapping, Object valu if (params.syntheticSource() && testContext.forceFallbackSyntheticSource() == false && usingSyntheticSourceDelegate) { var nullValue = (String) keywordMultiFieldMapping.get("null_value"); - // Due to how TextFieldMapper#blockReaderDisiLookup works this is complicated. - // If we are using lookupMatchingAll() then we'll see all docs, generate synthetic source using syntheticSourceDelegate, - // parse it and see null_value inside. - // But if we are using lookupFromNorms() we will skip the document (since the text field itself does not exist). - // Same goes for lookupFromFieldNames(). - boolean textFieldIndexed = (boolean) fieldMapping.getOrDefault("index", true); - if (value == null) { - if (textFieldIndexed == false && nullValue != null && nullValue.length() <= (int) ignoreAbove) { + if (nullValue != null && nullValue.length() <= (int) ignoreAbove) { return new BytesRef(nullValue); } @@ -82,12 +75,6 @@ public static Object expectedValue(Map fieldMapping, Object valu } var values = (List) value; - - // See note above about TextFieldMapper#blockReaderDisiLookup. - if (textFieldIndexed && values.stream().allMatch(Objects::isNull)) { - return null; - } - var indexed = values.stream() .map(s -> s == null ? nullValue : s) .filter(Objects::nonNull) From ec0e815c9eb643cd7b93565f0c62f69028adc584 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Tue, 6 May 2025 13:28:16 -0700 Subject: [PATCH 2/4] fix --- docs/reference/mapping/types/text.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc index 8f192a165fa17..339df5e68fbef 100644 --- a/docs/reference/mapping/types/text.asciidoc +++ b/docs/reference/mapping/types/text.asciidoc @@ -217,7 +217,7 @@ PUT idx/_doc/1 null, "the quick brown fox", "the quick brown fox", - "jumped over the lazy dog", + "jumped over the lazy dog" ] } ---- From 943c637d63180879d2d6103bb13a54f38ab61753 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Tue, 6 May 2025 15:18:12 -0700 Subject: [PATCH 3/4] fix --- docs/reference/mapping/types/text.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc index 339df5e68fbef..d7f429aac3b71 100644 --- a/docs/reference/mapping/types/text.asciidoc +++ b/docs/reference/mapping/types/text.asciidoc @@ -228,7 +228,7 @@ Will become: ---- { "text": [ - "jumped over the lazy dog" + "jumped over the lazy dog", "NA", "the quick brown fox" ] From 681a1cba38c0472fc8c054f003b7907cb040c8f5 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Wed, 7 May 2025 11:26:20 -0700 Subject: [PATCH 4/4] fix --- docs/reference/mapping/types/keyword.asciidoc | 2 +- docs/reference/mapping/types/text.asciidoc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc index 30c181f12a4be..0734238c20b9f 100644 --- a/docs/reference/mapping/types/keyword.asciidoc +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -326,7 +326,7 @@ Will become: [source,console-result] ---- { - "kwd": ["bar", "foo", "NA"] + "kwd": ["NA", "bar", "foo"] } ---- // TEST[s/^/{"_source":/ s/\n$/}/] diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc index d7f429aac3b71..ecbcb896d9f69 100644 --- a/docs/reference/mapping/types/text.asciidoc +++ b/docs/reference/mapping/types/text.asciidoc @@ -228,8 +228,8 @@ Will become: ---- { "text": [ - "jumped over the lazy dog", "NA", + "jumped over the lazy dog", "the quick brown fox" ] }