From 6c60ed61dbd76b2da734981c93fc8e5667ae18c0 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Wed, 27 Aug 2025 11:05:25 -0500 Subject: [PATCH] Allow trailing empty string field names in paths of flattened field (#133611) Flattened objects allow for keys with leading and trailing path separators (the period character .). They also allow field names consisting only of path separators. These fields names are then joined by the path separator and whole paths are stored with the associated value as key value pairs. The whole paths are then separated back into their component fields by splitting on the path separator. There is ambiguity whether a given period is meant as a path separator or part of a path name. Because of this, we assume that all periods are path separators. This means that any two adjacent periods in the whole path have an empty string field name between them. The same is true of leading and trailing periods. This is how we already handle adjacent periods that are at the front or middle of the whole path. But due to how the split function works, this is not how trailing adjacent periods are handled. By default the split function does not return trailing empty strings. By adding a negative limit to split, any trailing empty strings are now returned. Thus trailing adjacent periods are now treated the same as leading periods or periods in the middle of the string. More importantly, if the whole path consists of periods, the resulting path string was previously empty, causing an ArrayOutOfBoundsException. With this change, any paths consisting solely of periods will be treated has having empty string field names before and after every period. Fixes #130139 (cherry picked from commit 4661d06a155ad8fdabb3b103f22c7322788721ca) # Conflicts: # server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java --- docs/changelog/133611.yaml | 6 +++ .../FlattenedFieldSyntheticWriterHelper.java | 4 +- .../flattened/FlattenedFieldMapperTests.java | 50 +++++++++++++++++++ ...ttenedFieldSyntheticWriterHelperTests.java | 49 ++++++++++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/133611.yaml diff --git a/docs/changelog/133611.yaml b/docs/changelog/133611.yaml new file mode 100644 index 0000000000000..9b86f75d9d276 --- /dev/null +++ b/docs/changelog/133611.yaml @@ -0,0 +1,6 @@ +pr: 133611 +summary: Allow trailing empty string field names in paths of flattened field +area: Mapping +type: bug +issues: + - 130139 diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java index aa1805d485210..2f1efd970d637 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java @@ -116,7 +116,9 @@ private KeyValue(final String value, final Prefix prefix, final String leaf) { KeyValue(final BytesRef keyValue) { this( - FlattenedFieldParser.extractKey(keyValue).utf8ToString().split(PATH_SEPARATOR_PATTERN), + // Splitting with a negative limit includes trailing empty strings. + // This is needed in case the provide path has trailing path separators. + FlattenedFieldParser.extractKey(keyValue).utf8ToString().split(PATH_SEPARATOR_PATTERN, -1), FlattenedFieldParser.extractValue(keyValue).utf8ToString() ); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java index c6062efc744d9..0628ca47308ea 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java @@ -931,6 +931,56 @@ public void testSyntheticSourceWithEmptyObject() throws IOException { {"field":{"key1":"foo"}}""")); } + public void testSyntheticSourceWithMatchesInNestedPath() throws IOException { + DocumentMapper mapper = createSytheticSourceMapperService( + mapping(b -> { b.startObject("field").field("type", "flattened").endObject(); }) + ).documentMapper(); + + // This test covers a scenario that previously had a bug. + // Since a.b.c and b.b.d have a matching middle key `b`, and b.b.d starts with a `b`, + // startObject was not called for the first `b` in b.b.d. + // For a full explanation see this comment: https://github.com/elastic/elasticsearch/pull/129600#issuecomment-3024476134 + var syntheticSource = syntheticSource(mapper, b -> { + b.startObject("field"); + { + b.startObject("a"); + { + b.startObject("b").field("c", "1").endObject(); + } + b.endObject(); + b.startObject("b"); + { + b.startObject("b").field("d", "2").endObject(); + } + b.endObject(); + } + b.endObject(); + }); + assertThat(syntheticSource, equalTo(""" + {"field":{"a":{"b":{"c":"1"}},"b":{"b":{"d":"2"}}}}""")); + } + + public void testMultipleDotsInPath() throws IOException { + DocumentMapper mapper = createSytheticSourceMapperService( + mapping(b -> { b.startObject("field").field("type", "flattened").endObject(); }) + ).documentMapper(); + + var syntheticSource = syntheticSource(mapper, b -> { + b.startObject("field"); + { + b.startObject("."); + { + b.field(".", "bar"); + } + b.endObject(); + } + b.endObject(); + }); + // This behavior is weird to say the least. But this is the only reasonable way to interpret the meaning of the path `...` + assertThat(syntheticSource, equalTo(""" + {"field":{"":{"":{"":{"":"bar"}}}}}""")); + } + @Override protected boolean supportsCopyTo() { return false; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java index 4ca5da1c42d40..f59640194f8cd 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java @@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets; import java.util.List; import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -246,6 +247,54 @@ public void testScalarObjectMismatchInNestedObject() throws IOException { assertEquals("{\"a\":{\"b\":{\"c\":\"10\",\"c.d\":\"20\"}}}", baos.toString(StandardCharsets.UTF_8)); } + public void testSingleDotPath() throws IOException { + // GIVEN + final SortedSetDocValues dv = mock(SortedSetDocValues.class); + final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv)); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos); + final List bytes = Stream.of("." + '\0' + "10").map(x -> x.getBytes(StandardCharsets.UTF_8)).toList(); + when(dv.getValueCount()).thenReturn(Long.valueOf(bytes.size())); + when(dv.docValueCount()).thenReturn(bytes.size()); + for (int i = 0; i < bytes.size(); i++) { + when(dv.nextOrd()).thenReturn((long) i); + when(dv.lookupOrd(ArgumentMatchers.eq((long) i))).thenReturn(new BytesRef(bytes.get(i), 0, bytes.get(i).length)); + } + + // WHEN + builder.startObject(); + writer.write(builder); + builder.endObject(); + builder.flush(); + + // THEN + assertEquals("{\"\":{\"\":\"10\"}}", baos.toString(StandardCharsets.UTF_8)); + } + + public void testTrailingDotsPath() throws IOException { + // GIVEN + final SortedSetDocValues dv = mock(SortedSetDocValues.class); + final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv)); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos); + final List bytes = Stream.of("cat.." + '\0' + "10").map(x -> x.getBytes(StandardCharsets.UTF_8)).toList(); + when(dv.getValueCount()).thenReturn(Long.valueOf(bytes.size())); + when(dv.docValueCount()).thenReturn(bytes.size()); + for (int i = 0; i < bytes.size(); i++) { + when(dv.nextOrd()).thenReturn((long) i); + when(dv.lookupOrd(ArgumentMatchers.eq((long) i))).thenReturn(new BytesRef(bytes.get(i), 0, bytes.get(i).length)); + } + + // WHEN + builder.startObject(); + writer.write(builder); + builder.endObject(); + builder.flush(); + + // THEN + assertEquals("{\"cat\":{\"\":{\"\":\"10\"}}}", baos.toString(StandardCharsets.UTF_8)); + } + private class SortedSetSortedKeyedValues implements FlattenedFieldSyntheticWriterHelper.SortedKeyedValues { private final SortedSetDocValues dv; private int seen = 0;