Skip to content

Commit

Permalink
Automatically flatten objects when subobjects:false (#97972)
Browse files Browse the repository at this point in the history
While ingesting documents that contain nested objects and the
mapping property subobjects is set to false instead of throwing
a mapping exception and dropping the document(s), we map only
leaf field(s) with their full path as their name separated by dots.
  • Loading branch information
piergm committed Aug 24, 2023
1 parent bb1dad8 commit 392c497
Show file tree
Hide file tree
Showing 7 changed files with 755 additions and 39 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/97972.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 97972
summary: Automatically flatten objects when subobjects:false
area: Mapping
type: enhancement
issues:
- 88934
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.xcontent;

import java.io.IOException;

/**
* A subclass of XContentSubParser that provides the functionality to flatten
* the field names by prefixing them with the provided parent name.
*/
public class FlatteningXContentParser extends XContentSubParser {
private final String parentName;
private static final char DELIMITER = '.';

/**
* Constructs a FlatteningXContentParser with the given parent name and wraps an existing XContentParser.
*
* @param parser The XContentParser to be wrapped and extended with flattening functionality.
* @param parentName The parent name to be used as a prefix for immediate children.
*/
public FlatteningXContentParser(XContentParser parser, String parentName) {
super(parser);
this.parentName = parentName;
}

/**
* Retrieves the name of the current field being parsed. If the current parsing level is 1,
* the returned field name will be constructed by prepending the parent name to the
* delegate's currentFieldName, otherwise just delegate.
*
* @return The current field name, potentially modified by prepending the parent name as a prefix.
* @throws IOException If an I/O error occurs during parsing.
*/
@Override
public String currentName() throws IOException {
if (level() == 1) {
return new StringBuilder(parentName).append(DELIMITER).append(delegate().currentName()).toString();
}
return delegate().currentName();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,8 @@ public void close() throws IOException {
}
}
}

int level() {
return level;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,41 @@ public void testSubParserObject() throws IOException {
}
}

public void testFlatteningParserObject() throws IOException {
String content = """
{
"parent": {
"child1" : 1,
"child2": {
"grandChild" : 1
},
"child3" : 1
}
}
""";
XContentParser parser = createParser(JsonXContent.jsonXContent, content);
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
assertEquals("parent", parser.currentName());
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
XContentParser subParser = new FlatteningXContentParser(parser, parser.currentName());
assertEquals(XContentParser.Token.FIELD_NAME, subParser.nextToken());
assertEquals("parent.child1", subParser.currentName());
assertEquals(XContentParser.Token.VALUE_NUMBER, subParser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, subParser.nextToken());
String secondChildName = subParser.currentName();
assertEquals("parent.child2", secondChildName);
assertEquals(XContentParser.Token.START_OBJECT, subParser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, subParser.nextToken());
assertEquals("grandChild", subParser.currentName());
assertEquals(XContentParser.Token.VALUE_NUMBER, subParser.nextToken());
assertEquals(XContentParser.Token.END_OBJECT, subParser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, subParser.nextToken());
assertEquals("parent.child3", subParser.currentName());
assertEquals(XContentParser.Token.VALUE_NUMBER, subParser.nextToken());

}

public void testSubParserArray() throws IOException {
XContentBuilder builder = XContentFactory.jsonBuilder();
int numberOfArrayElements = randomInt(10);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,15 @@ static void parseObjectOrField(DocumentParserContext context, Mapper mapper) thr
context = context.createChildContext(objectMapper);
parseObjectOrNested(context);
} else if (mapper instanceof FieldMapper fieldMapper) {
fieldMapper.parse(context);
if (shouldFlattenObject(context, fieldMapper)) {
// we pass the mapper's simpleName as parentName to the new DocumentParserContext
String currentFieldName = fieldMapper.simpleName();
context.path().remove();
parseObjectOrNested(context.createFlattenContext(currentFieldName));
context.path().add(currentFieldName);
} else {
fieldMapper.parse(context);
}
if (context.isWithinCopyTo() == false) {
List<String> copyToFields = fieldMapper.copyTo().copyToFields();
if (copyToFields.isEmpty() == false) {
Expand All @@ -415,6 +423,12 @@ static void parseObjectOrField(DocumentParserContext context, Mapper mapper) thr
}
}

private static boolean shouldFlattenObject(DocumentParserContext context, FieldMapper fieldMapper) {
return context.parser().currentToken() == XContentParser.Token.START_OBJECT
&& context.parent().subobjects() == false
&& fieldMapper.supportsParsingObject() == false;
}

private static void throwOnUnrecognizedMapperType(Mapper mapper) {
throw new IllegalStateException(
"The provided mapper [" + mapper.name() + "] has an unrecognized type [" + mapper.getClass().getSimpleName() + "]."
Expand Down Expand Up @@ -472,7 +486,6 @@ private static void parseObjectDynamic(DocumentParserContext context, String cur
dynamicObjectMapper = new NoOpObjectMapper(currentFieldName, context.path().pathAsText(currentFieldName));
} else {
dynamicObjectMapper = DynamicFieldsBuilder.createDynamicObjectMapper(context, currentFieldName);
context.addDynamicMapper(dynamicObjectMapper);
}
if (context.parent().subobjects() == false) {
if (dynamicObjectMapper instanceof NestedObjectMapper) {
Expand All @@ -486,15 +499,16 @@ private static void parseObjectDynamic(DocumentParserContext context, String cur
);
}
if (dynamicObjectMapper instanceof ObjectMapper) {
throw new DocumentParsingException(
context.parser().getTokenLocation(),
"Tried to add subobject ["
+ dynamicObjectMapper.simpleName()
+ "] to object ["
+ context.parent().name()
+ "] which does not support subobjects"
);
// We have an ObjectMapper but subobjects are disallowed
// therefore we create a new DocumentParserContext that
// prepends currentFieldName to any immediate children.
parseObjectOrNested(context.createFlattenContext(currentFieldName));
return;
}

}
if (context.dynamic() != ObjectMapper.Dynamic.RUNTIME) {
context.addDynamicMapper(dynamicObjectMapper);
}
if (dynamicObjectMapper instanceof NestedObjectMapper && context.isWithinCopyTo()) {
throwOnCreateDynamicNestedViaCopyTo(dynamicObjectMapper, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.xcontent.FilterXContentParserWrapper;
import org.elasticsearch.xcontent.FlatteningXContentParser;
import org.elasticsearch.xcontent.XContentParser;

import java.io.IOException;
Expand Down Expand Up @@ -446,6 +447,20 @@ public LuceneDocument doc() {
};
}

/**
* Return a context for flattening subobjects
* @param fieldName the name of the field to be flattened
*/
public final DocumentParserContext createFlattenContext(String fieldName) {
XContentParser flatteningParser = new FlatteningXContentParser(parser(), fieldName);
return new Wrapper(this.parent(), this) {
@Override
public XContentParser parser() {
return flatteningParser;
}
};
}

/**
* @deprecated we are actively deprecating and removing the ability to pass
* complex objects to multifields, so try and avoid using this method
Expand Down

0 comments on commit 392c497

Please sign in to comment.