Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/133611.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 133611
summary: Allow trailing empty string field names in paths of flattened field
area: Mapping
type: bug
issues:
- 130139
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ private KeyValue(final String value, final Prefix prefix, final String leaf) {

KeyValue(final BytesRef keyValue) {
this(
FlattenedFieldParser.extractKey(keyValue).utf8ToString().split(PATH_SEPARATOR_PATTERN),
// Splitting with a negative limit includes trailing empty strings.
// This is needed in case the provide path has trailing path separators.
FlattenedFieldParser.extractKey(keyValue).utf8ToString().split(PATH_SEPARATOR_PATTERN, -1),
FlattenedFieldParser.extractValue(keyValue).utf8ToString()
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,56 @@ public void testSyntheticSourceWithEmptyObject() throws IOException {
{"field":{"key1":"foo"}}"""));
}

public void testSyntheticSourceWithMatchesInNestedPath() throws IOException {
DocumentMapper mapper = createSytheticSourceMapperService(
mapping(b -> { b.startObject("field").field("type", "flattened").endObject(); })
).documentMapper();

// This test covers a scenario that previously had a bug.
// Since a.b.c and b.b.d have a matching middle key `b`, and b.b.d starts with a `b`,
// startObject was not called for the first `b` in b.b.d.
// For a full explanation see this comment: https://github.com/elastic/elasticsearch/pull/129600#issuecomment-3024476134
var syntheticSource = syntheticSource(mapper, b -> {
b.startObject("field");
{
b.startObject("a");
{
b.startObject("b").field("c", "1").endObject();
}
b.endObject();
b.startObject("b");
{
b.startObject("b").field("d", "2").endObject();
}
b.endObject();
}
b.endObject();
});
assertThat(syntheticSource, equalTo("""
{"field":{"a":{"b":{"c":"1"}},"b":{"b":{"d":"2"}}}}"""));
}

public void testMultipleDotsInPath() throws IOException {
DocumentMapper mapper = createSytheticSourceMapperService(
mapping(b -> { b.startObject("field").field("type", "flattened").endObject(); })
).documentMapper();

var syntheticSource = syntheticSource(mapper, b -> {
b.startObject("field");
{
b.startObject(".");
{
b.field(".", "bar");
}
b.endObject();
}
b.endObject();
});
// This behavior is weird to say the least. But this is the only reasonable way to interpret the meaning of the path `...`
assertThat(syntheticSource, equalTo("""
{"field":{"":{"":{"":{"":"bar"}}}}}"""));
}

@Override
protected boolean supportsCopyTo() {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
Expand Down Expand Up @@ -246,6 +247,54 @@ public void testScalarObjectMismatchInNestedObject() throws IOException {
assertEquals("{\"a\":{\"b\":{\"c\":\"10\",\"c.d\":\"20\"}}}", baos.toString(StandardCharsets.UTF_8));
}

public void testSingleDotPath() throws IOException {
// GIVEN
final SortedSetDocValues dv = mock(SortedSetDocValues.class);
final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv));
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos);
final List<byte[]> bytes = Stream.of("." + '\0' + "10").map(x -> x.getBytes(StandardCharsets.UTF_8)).toList();
when(dv.getValueCount()).thenReturn(Long.valueOf(bytes.size()));
when(dv.docValueCount()).thenReturn(bytes.size());
for (int i = 0; i < bytes.size(); i++) {
when(dv.nextOrd()).thenReturn((long) i);
when(dv.lookupOrd(ArgumentMatchers.eq((long) i))).thenReturn(new BytesRef(bytes.get(i), 0, bytes.get(i).length));
}

// WHEN
builder.startObject();
writer.write(builder);
builder.endObject();
builder.flush();

// THEN
assertEquals("{\"\":{\"\":\"10\"}}", baos.toString(StandardCharsets.UTF_8));
}

public void testTrailingDotsPath() throws IOException {
// GIVEN
final SortedSetDocValues dv = mock(SortedSetDocValues.class);
final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv));
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos);
final List<byte[]> bytes = Stream.of("cat.." + '\0' + "10").map(x -> x.getBytes(StandardCharsets.UTF_8)).toList();
when(dv.getValueCount()).thenReturn(Long.valueOf(bytes.size()));
when(dv.docValueCount()).thenReturn(bytes.size());
for (int i = 0; i < bytes.size(); i++) {
when(dv.nextOrd()).thenReturn((long) i);
when(dv.lookupOrd(ArgumentMatchers.eq((long) i))).thenReturn(new BytesRef(bytes.get(i), 0, bytes.get(i).length));
}

// WHEN
builder.startObject();
writer.write(builder);
builder.endObject();
builder.flush();

// THEN
assertEquals("{\"cat\":{\"\":{\"\":\"10\"}}}", baos.toString(StandardCharsets.UTF_8));
}

private class SortedSetSortedKeyedValues implements FlattenedFieldSyntheticWriterHelper.SortedKeyedValues {
private final SortedSetDocValues dv;
private int seen = 0;
Expand Down