Skip to content

Commit

Permalink
Assign the right path to objects merged when parsing mappings (#89389)
Browse files Browse the repository at this point in the history
When parsing mappings, we may find a field with same name specified twice, either
because JSON duplicate keys are allowed, or because a mix of object notation and dot
notation is used when providing mappings. The same can happen when applying dynamic
mappings as part of parsing an incoming document, as well as when merging separate
index templates that may contain the definition for the same field using a
mix of object notation and dot notation.

While we propagate the MapperBuilderContext across merge calls thanks to #86946, we do not
propagate the right context when we call merge on objects as part of parsing/building
mappings. This causes a situation in which the leaf fields that result from the merge
have the wrong path, which misses the first portion e.g. sub.field instead of obj.sub.field.

This commit applies the correct mapper builder context when building the object mapper builder
and two objects with same name are found.

Relates to #86946

Closes #88573
  • Loading branch information
javanna committed Aug 17, 2022
1 parent 09d0025 commit c038a91
Show file tree
Hide file tree
Showing 9 changed files with 318 additions and 21 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/89389.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 89389
summary: Assign the right path to objects merged when parsing mappings
area: Mapping
type: bug
issues:
- 88573
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import org.elasticsearch.action.admin.indices.template.get.GetIndexTemplatesResponse;
import org.elasticsearch.action.admin.indices.template.put.PutIndexTemplateRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.cluster.ClusterState;
Expand All @@ -39,6 +41,7 @@
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
Expand Down Expand Up @@ -1009,4 +1012,88 @@ public void testPartitionedTemplate() throws Exception {
GetSettingsResponse getSettingsResponse = client().admin().indices().prepareGetSettings("test_good").get();
assertEquals("6", getSettingsResponse.getIndexToSettings().get("test_good").get("index.routing_partition_size"));
}

public void testIndexTemplatesWithSameSubfield() {
client().admin()
.indices()
.preparePutTemplate("template_1")
.setPatterns(Collections.singletonList("te*"))
.setSettings(indexSettings())
.setOrder(100)
.setMapping("""
{
"_doc": {
"properties": {
"kwm": {
"properties": {
"source": {
"properties": {
"geo": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
}
}
},
"source": {
"properties": {
"geo": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
}
}
}
}
""", XContentType.JSON)
.get();

client().admin()
.indices()
.preparePutTemplate("template_2")
.setPatterns(Collections.singletonList("test*"))
.setSettings(indexSettings())
.setOrder(1)
.setMapping("""
{
"_doc": {
"properties": {
"kwm.source.geo": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
}
}
""", XContentType.JSON)
.get();

client().prepareIndex("test").setSource().get();
FieldCapabilitiesResponse fieldCapabilitiesResponse = client().prepareFieldCaps("test").setFields("*location").get();
{
Map<String, FieldCapabilities> field = fieldCapabilitiesResponse.getField("kwm.source.geo.location");
assertNotNull(field);
FieldCapabilities fieldCapabilities = field.get("geo_point");
assertTrue(fieldCapabilities.isSearchable());
assertTrue(fieldCapabilities.isAggregatable());
}
{
Map<String, FieldCapabilities> field = fieldCapabilitiesResponse.getField("source.geo.location");
assertNotNull(field);
FieldCapabilities fieldCapabilities = field.get("geo_point");
assertTrue(fieldCapabilities.isSearchable());
assertTrue(fieldCapabilities.isAggregatable());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
}

@Override
public ObjectMapper merge(Mapper mergeWith, MapperService.MergeReason reason, MapperBuilderContext mapperBuilderContext) {
public ObjectMapper merge(Mapper mergeWith, MapperService.MergeReason reason, MapperBuilderContext parentBuilderContext) {
if ((mergeWith instanceof NestedObjectMapper) == false) {
throw new IllegalArgumentException("can't merge a non nested mapping [" + mergeWith.name() + "] with a nested mapping");
}
Expand All @@ -191,7 +191,7 @@ public ObjectMapper merge(Mapper mergeWith, MapperService.MergeReason reason, Ma
throw new MapperException("the [include_in_root] parameter can't be updated on a nested object mapping");
}
}
toMerge.doMerge(mergeWithObject, reason, mapperBuilderContext);
toMerge.doMerge(mergeWithObject, reason, parentBuilderContext);
return toMerge;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ protected final Map<String, Mapper> buildMappers(boolean root, MapperBuilderCont
assert mapper instanceof ObjectMapper == false || subobjects.value() : "unexpected object while subobjects are disabled";
Mapper existing = mappers.get(mapper.simpleName());
if (existing != null) {
// The same mappings or document may hold the same field twice, either because duplicated JSON keys are allowed or
// the same field is provided using the object notation as well as the dot notation at the same time.
// This can also happen due to multiple index templates being merged into a single mappings definition using
// XContentHelper#mergeDefaults, again in case some index templates contained mappings for the same field using a
// mix of object notation and dot notation.
mapper = existing.merge(mapper, mapperBuilderContext);
}
mappers.put(mapper.simpleName(), mapper);
Expand Down Expand Up @@ -426,7 +431,11 @@ public void validate(MappingLookup mappers) {
}
}

public ObjectMapper merge(Mapper mergeWith, MergeReason reason, MapperBuilderContext mapperBuilderContext) {
protected MapperBuilderContext createChildContext(MapperBuilderContext mapperBuilderContext, String name) {
return mapperBuilderContext.createChildContext(name);
}

public ObjectMapper merge(Mapper mergeWith, MergeReason reason, MapperBuilderContext parentBuilderContext) {
if ((mergeWith instanceof ObjectMapper) == false) {
throw new IllegalArgumentException("can't merge a non object mapping [" + mergeWith.name() + "] with an object mapping");
}
Expand All @@ -436,12 +445,11 @@ public ObjectMapper merge(Mapper mergeWith, MergeReason reason, MapperBuilderCon
}
ObjectMapper mergeWithObject = (ObjectMapper) mergeWith;
ObjectMapper merged = clone();
merged.doMerge(mergeWithObject, reason, mapperBuilderContext);
merged.doMerge(mergeWithObject, reason, parentBuilderContext);
return merged;
}

protected void doMerge(final ObjectMapper mergeWith, MergeReason reason, MapperBuilderContext mapperBuilderContext) {

protected void doMerge(final ObjectMapper mergeWith, MergeReason reason, MapperBuilderContext parentBuilderContext) {
if (mergeWith.dynamic != null) {
this.dynamic = mergeWith.dynamic;
}
Expand All @@ -462,6 +470,7 @@ protected void doMerge(final ObjectMapper mergeWith, MergeReason reason, MapperB
}
}

MapperBuilderContext objectBuilderContext = createChildContext(parentBuilderContext, simpleName());
Map<String, Mapper> mergedMappers = null;
for (Mapper mergeWithMapper : mergeWith) {
Mapper mergeIntoMapper = (mergedMappers == null ? mappers : mergedMappers).get(mergeWithMapper.simpleName());
Expand All @@ -470,8 +479,7 @@ protected void doMerge(final ObjectMapper mergeWith, MergeReason reason, MapperB
if (mergeIntoMapper == null) {
merged = mergeWithMapper;
} else if (mergeIntoMapper instanceof ObjectMapper objectMapper) {
MapperBuilderContext childContext = mapperBuilderContext.createChildContext(objectMapper.simpleName());
merged = objectMapper.merge(mergeWithMapper, reason, childContext);
merged = objectMapper.merge(mergeWithMapper, reason, objectBuilderContext);
} else {
assert mergeIntoMapper instanceof FieldMapper || mergeIntoMapper instanceof FieldAliasMapper;
if (mergeWithMapper instanceof ObjectMapper) {
Expand All @@ -485,7 +493,7 @@ protected void doMerge(final ObjectMapper mergeWith, MergeReason reason, MapperB
if (reason == MergeReason.INDEX_TEMPLATE) {
merged = mergeWithMapper;
} else {
merged = mergeIntoMapper.merge(mergeWithMapper, mapperBuilderContext);
merged = mergeIntoMapper.merge(mergeWithMapper, objectBuilderContext);
}
}
if (mergedMappers == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,13 +322,19 @@ RuntimeField getRuntimeField(String name) {
}

@Override
public RootObjectMapper merge(Mapper mergeWith, MergeReason reason, MapperBuilderContext mapperBuilderContext) {
return (RootObjectMapper) super.merge(mergeWith, reason, mapperBuilderContext);
protected MapperBuilderContext createChildContext(MapperBuilderContext mapperBuilderContext, String name) {
assert mapperBuilderContext == MapperBuilderContext.ROOT;
return mapperBuilderContext;
}

@Override
protected void doMerge(ObjectMapper mergeWith, MergeReason reason, MapperBuilderContext mapperBuilderContext) {
super.doMerge(mergeWith, reason, mapperBuilderContext);
public RootObjectMapper merge(Mapper mergeWith, MergeReason reason, MapperBuilderContext parentBuilderContext) {
return (RootObjectMapper) super.merge(mergeWith, reason, parentBuilderContext);
}

@Override
protected void doMerge(ObjectMapper mergeWith, MergeReason reason, MapperBuilderContext parentBuilderContext) {
super.doMerge(mergeWith, reason, parentBuilderContext);
RootObjectMapper mergeWithObject = (RootObjectMapper) mergeWith;
if (mergeWithObject.numericDetection.explicit()) {
this.numericDetection = mergeWithObject.numericDetection;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -2351,6 +2352,61 @@ public void testDocumentDescriptionInTsdb() throws IOException {
}
}

public void testMergeSubfieldWhileBuildingMappers() throws Exception {
MapperService mapperService = createMapperService();
/*
We had a bug (https://github.com/elastic/elasticsearch/issues/88573) building an object mapper (ObjectMapper.Builder#buildMappers).
A sub-field that already exists is merged with the existing one. As a result, the leaf field would get the wrong field path
(missing the first portion of its path). The only way to trigger this scenario for dynamic mappings is to either allow duplicate
JSON keys or ingest the same field with dots collapsed as well as expanded within the same document. Note that the two fields with
same name need to be part of the same mappings (hence the same document). If they are in two distinct mappings they are properly
merged as part of RootObjectMapper#merge.
*/
ParsedDocument doc = mapperService.documentMapper().parse(source("""
{
"foo" : {
"bar" : {
"baz" : 1
}
},
"foo.bar.baz" : 2
}
"""));
Mapping mapping = doc.dynamicMappingsUpdate();
assertNotNull(mapping);
Mapper fooMapper = mapping.getRoot().getMapper("foo");
assertNotNull(fooMapper);
assertTrue(fooMapper instanceof ObjectMapper);
Mapper barMapper = ((ObjectMapper) fooMapper).getMapper("bar");
assertTrue(barMapper instanceof ObjectMapper);
Mapper baz = ((ObjectMapper) barMapper).getMapper("baz");
assertNotNull(baz);
assertEquals("foo.bar.baz", baz.name());
assertEquals("baz", baz.simpleName());
IndexableField[] fields = doc.rootDoc().getFields("foo.bar.baz");
assertEquals(4, fields.length);
long[] longs = Arrays.stream(fields).mapToLong(value -> value.numericValue().longValue()).toArray();
assertArrayEquals(new long[] { 1, 1, 2, 2 }, longs);

// merge without going through toXContent and reparsing, otherwise the potential leaf path issue gets fixed on its own
Mapping newMapping = MapperService.mergeMappings(mapperService.documentMapper(), mapping, MapperService.MergeReason.MAPPING_UPDATE);
DocumentMapper newDocMapper = new DocumentMapper(mapperService.documentParser(), newMapping, newMapping.toCompressedXContent());
ParsedDocument doc2 = newDocMapper.parse(source("""
{
"foo" : {
"bar" : {
"baz" : 10
}
}
}
"""));
assertNull(doc2.dynamicMappingsUpdate());
IndexableField[] fields2 = doc2.rootDoc().getFields("foo.bar.baz");
assertEquals(2, fields2.length);
long[] longs2 = Arrays.stream(fields2).mapToLong(value -> value.numericValue().longValue()).toArray();
assertArrayEquals(new long[] { 10, 10 }, longs2);
}

/**
* Mapper plugin providing a mock metadata field mapper implementation that supports setting its value
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.index.IndexableField;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -360,4 +362,68 @@ public void testMultiFieldChecks() throws IOException {
assertFalse(mapperService.isMultiField("object.subfield1"));
}

public void testMergeObjectSubfieldWhileParsing() throws IOException {
/*
If we are parsing mappings that hold the definition of the same field twice, the two are merged together. This can happen when
mappings have the same field specified using the object notation as well as the dot notation, as well as when applying index
templates, in which case the two definitions may come from separate index templates that end up in the same map (through
XContentHelper#mergeDefaults, see MetadataCreateIndexService#parseV1Mappings).
We had a bug (https://github.com/elastic/elasticsearch/issues/88573) triggered by this scenario that caused the merged leaf fields
to get the wrong path (missing the first portion).
*/
MapperService mapperService = createMapperService("""
{
"_doc": {
"properties": {
"obj": {
"properties": {
"sub": {
"properties": {
"string": {
"type": "keyword"
}
}
}
}
},
"obj.sub.string" : {
"type" : "keyword"
}
}
}
}
""");

assertNotNull(mapperService.mappingLookup().getMapper("obj.sub.string"));
MappedFieldType fieldType = mapperService.mappingLookup().getFieldType("obj.sub.string");
assertNotNull(fieldType);
assertEquals("""
{
"_doc" : {
"properties" : {
"obj" : {
"properties" : {
"sub" : {
"properties" : {
"string" : {
"type" : "keyword"
}
}
}
}
}
}
}
}""", Strings.toString(mapperService.documentMapper().mapping(), true, true));

// check that with the resulting mappings a new document has the previously merged field indexed properly
ParsedDocument parsedDocument = mapperService.documentMapper().parse(source("""
{
"obj.sub.string" : "value"
}"""));

assertNull(parsedDocument.dynamicMappingsUpdate());
IndexableField[] fields = parsedDocument.rootDoc().getFields("obj.sub.string");
assertEquals(2, fields.length);
}
}

0 comments on commit c038a91

Please sign in to comment.