Skip to content

Commit

Permalink
Completion field to support multiple completion multi-fields (#83595)
Browse files Browse the repository at this point in the history
The completion field supports multi-fields. In case the sub-field is another completion field, the whole object structure from the incoming document needs to provided as input to the sub-field while parsing it. We have a special XContentParser for this scenario, but it does not handle returning the same object structure multiple times. That is why you can have only one completion sub-field within a completion field, and the error returned when such mechanism breaks is a general parsing error (that mentions a field called dummy_field) that makes users think they have done something wrong in their document.

This commit expands testing for this scenario and extends the parser to support it.

As part of this change, a new parser is created for each sub-field, which makes it possible to expose the same object structure multiple times, for instance in case a completion field has more than one completion sub-fields.

Additionally, the wrapping of both the geo_point multi field parser and the completion multi field parser into a dummy_field object is removed in favour of returning the correct currentName of the main field we are parsing.

Additionally getTokenLocation is tweaked to return the location of the field we are parsing in the document, so that error messages become clearer when things go wrong.

Closes #83534
  • Loading branch information
javanna committed Feb 11, 2022
1 parent 62943fe commit a91e692
Show file tree
Hide file tree
Showing 8 changed files with 381 additions and 93 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/83595.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 83595
summary: Completion field to support multiple completion multi-fields
area: Mapping
type: bug
issues:
- 83534
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@
import org.elasticsearch.xcontent.DeprecationHandler;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.XContentLocation;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.CharBuffer;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
Expand All @@ -28,23 +26,10 @@
*/
public class MapXContentParser extends AbstractXContentParser {

private XContentType xContentType;
private final XContentType xContentType;
private TokenIterator iterator;
private boolean closed;

public static XContentParser wrapObject(Object sourceMap) throws IOException {
XContentParser parser = new MapXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
Collections.singletonMap("dummy_field", sourceMap),
XContentType.JSON
);
parser.nextToken(); // start object
parser.nextToken(); // field name
parser.nextToken(); // field value
return parser;
}

public MapXContentParser(
NamedXContentRegistry xContentRegistry,
DeprecationHandler deprecationHandler,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.xcontent.DeprecationHandler;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.support.MapXContentParser;

import java.io.IOException;
Expand Down Expand Up @@ -53,7 +56,7 @@ public abstract void parse(XContentParser parser, CheckedConsumer<T, IOException
throws IOException;

private void fetchFromSource(Object sourceMap, Consumer<T> consumer) {
try (XContentParser parser = MapXContentParser.wrapObject(sourceMap)) {
try (XContentParser parser = wrapObject(sourceMap)) {
parse(parser, v -> consumer.accept(normalizeFromSource(v)), e -> {}); /* ignore malformed */
} catch (IOException e) {
throw new UncheckedIOException(e);
Expand All @@ -67,6 +70,18 @@ private void fetchFromSource(Object sourceMap, Consumer<T> consumer) {
// TODO: move geometry normalization to the geometry parser.
public abstract T normalizeFromSource(T geometry);

private static XContentParser wrapObject(Object sourceMap) throws IOException {
XContentParser parser = new MapXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
Collections.singletonMap("dummy_field", sourceMap),
XContentType.JSON
);
parser.nextToken(); // start object
parser.nextToken(); // field name
parser.nextToken(); // field value
return parser;
}
}

public abstract static class AbstractGeometryFieldType<T> extends MappedFieldType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,15 @@
import org.elasticsearch.search.suggest.completion.CompletionSuggester;
import org.elasticsearch.search.suggest.completion.context.ContextMapping;
import org.elasticsearch.search.suggest.completion.context.ContextMappings;
import org.elasticsearch.xcontent.FilterXContentParser;
import org.elasticsearch.xcontent.DelegatingXContentParser;
import org.elasticsearch.xcontent.DeprecationHandler;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentLocation;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParser.NumberType;
import org.elasticsearch.xcontent.XContentParser.Token;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.support.MapXContentParser;

import java.io.IOException;
Expand Down Expand Up @@ -425,8 +429,11 @@ public void parse(DocumentParserContext context) throws IOException {

context.addToFieldNames(fieldType().name());
for (CompletionInputMetadata metadata : inputMap.values()) {
DocumentParserContext externalValueContext = context.switchParser(new CompletionParser(metadata));
multiFields.parse(this, externalValueContext);
multiFields.parse(
this,
context,
() -> context.switchParser(new MultiFieldParser(metadata, fieldType().name(), context.parser().getTokenLocation()))
);
}
}

Expand Down Expand Up @@ -586,28 +593,99 @@ public void doValidate(MappingLookup mappers) {
}
}

private static class CompletionParser extends FilterXContentParser {
/**
* Parser that exposes the expected format depending on the type of multi-field that is consuming content.
* Completion fields can hold multi-fields, which can either parse a simple string value or an object in case of another completion
* field. This parser detects which of the two is parsing content and exposes the full object when needed (including input, weight
* and context if available), otherwise the input value only.
*
* A few assumptions are made that make this work:
* 1) only string values are supported for a completion field, hence only sub-fields that parse strings are supported
* 2) sub-fields that parse simple values only ever call {@link #textOrNull()} to do so. They may call {@link #currentToken()} only to
* check if there's a null value, which is irrelevant in the multi-fields scenario as null values are ignored in the parent field and
* don't lead to any field creation.
* 3) completion is the only sub-field type that may be parsing the object structure.
*
* The parser is set to expose by default simple value, unless {@link #nextToken()} is called which is what signals that the
* consumer supports the object structure.
*/
// This parser changes behaviour depending on which methods are called by consumers, which is extremely delicate. This kind of works for
// our internal mappers, but what about mappers from plugins
static class MultiFieldParser extends DelegatingXContentParser {
private final String textValue;
private final String fieldName;
private final XContentLocation locationOffset;
private final XContentParser fullObjectParser;
// we assume that the consumer is parsing values, we will switch to exposing the object format if nextToken is called
private boolean parsingObject = false;

MultiFieldParser(CompletionInputMetadata metadata, String fieldName, XContentLocation locationOffset) {
this.fullObjectParser = new MapXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
metadata.toMap(),
XContentType.JSON
);
this.fieldName = fieldName;
this.locationOffset = locationOffset;
this.textValue = metadata.input;
}

boolean advanced = false;
final String textValue;
@Override
protected XContentParser delegate() {
// if consumers are only reading values, they should never go through delegate and rather call the
// overridden currentToken and textOrNull below that don't call super
assert parsingObject;
return fullObjectParser;
}

private CompletionParser(CompletionInputMetadata metadata) throws IOException {
super(MapXContentParser.wrapObject(metadata.toMap()));
this.textValue = metadata.input;
@Override
public Token currentToken() {
if (parsingObject == false) {
// nextToken has not been called, it may or may not be called at a later time.
// What we return does not really matter for mappers that support simple values, as they only check for VALUE_NULL.
// For mappers that do support objects, START_OBJECT is a good choice.
return Token.START_OBJECT;
}
return super.currentToken();
}

@Override
public String textOrNull() throws IOException {
if (advanced == false) {
if (parsingObject == false) {
return textValue;
}
return super.textOrNull();
}

@Override
public Token nextToken() throws IOException {
advanced = true;
if (parsingObject == false) {
// a completion sub-field is parsing
parsingObject = true;
// move to START_OBJECT, currentToken has already returned START_OBJECT and we will advance one token further just below
this.fullObjectParser.nextToken();
}
return super.nextToken();
}

@Override
public String currentName() throws IOException {
if (parsingObject == false) {
return fieldName;
}
String currentName = super.currentName();
if (currentName == null && currentToken() == Token.END_OBJECT) {
return fieldName;
}
return currentName;
}

@Override
public XContentLocation getTokenLocation() {
// return fixed token location: it's not possible to match the token location while parsing through the object structure,
// because completion metadata have been rewritten hence they won't match the incoming document
return locationOffset;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ public void parse(DocumentParserContext context) throws IOException {
valuePreview
);
}
multiFields.parse(this, context);
multiFields.parse(this, context, () -> context);
}

/**
Expand Down Expand Up @@ -449,7 +449,7 @@ public final Map<String, NamedAnalyzer> indexAnalyzers() {
return indexAnalyzers;
}

public static class MultiFields implements Iterable<FieldMapper>, ToXContent {
public static final class MultiFields implements Iterable<FieldMapper>, ToXContent {

private static final MultiFields EMPTY = new MultiFields(Collections.emptyMap());

Expand Down Expand Up @@ -507,16 +507,16 @@ private MultiFields(Map<String, FieldMapper> mappers) {
this.mappers = mappers;
}

public void parse(FieldMapper mainField, DocumentParserContext context) throws IOException {
public void parse(FieldMapper mainField, DocumentParserContext context, Supplier<DocumentParserContext> multiFieldContextSupplier)
throws IOException {
// TODO: multi fields are really just copy fields, we just need to expose "sub fields" or something that can be part
// of the mappings
if (mappers.isEmpty()) {
return;
}

context.path().add(mainField.simpleName());
for (FieldMapper mapper : mappers.values()) {
mapper.parse(context);
mapper.parse(multiFieldContextSupplier.get());
}
context.path().remove();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@
import org.elasticsearch.search.lookup.FieldValues;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.runtime.GeoPointScriptFieldDistanceFeatureQuery;
import org.elasticsearch.xcontent.FilterXContentParser;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.support.MapXContentParser;

import java.io.IOException;
import java.io.UncheckedIOException;
Expand Down Expand Up @@ -215,7 +215,38 @@ protected void index(DocumentParserContext context, GeoPoint geometry) throws IO
context.doc().add(new StoredField(fieldType().name(), geometry.toString()));
}
// TODO phase out geohash (which is currently used in the CompletionSuggester)
multiFields.parse(this, context.switchParser(MapXContentParser.wrapObject(geometry.geohash())));
// we only expose the geohash value and disallow advancing tokens, hence we can reuse the same parser throughout multiple sub-fields
DocumentParserContext parserContext = context.switchParser(new GeoHashMultiFieldParser(context.parser(), geometry.geohash()));
multiFields.parse(this, context, () -> parserContext);
}

/**
* Parser that pretends to be the main document parser, but exposes the provided geohash regardless of how the geopoint was provided
* in the incoming document. We rely on the fact that consumers are only ever call {@link XContentParser#textOrNull()} and never
* advance tokens, which is explicitly disallowed by this parser.
*/
static class GeoHashMultiFieldParser extends FilterXContentParser {
private final String value;

GeoHashMultiFieldParser(XContentParser innerParser, String value) {
super(innerParser);
this.value = value;
}

@Override
public String textOrNull() throws IOException {
return value;
}

@Override
public Token currentToken() {
return Token.VALUE_STRING;
}

@Override
public Token nextToken() throws IOException {
throw new UnsupportedOperationException();
}
}

@Override
Expand Down

0 comments on commit a91e692

Please sign in to comment.