Completion field to support multiple completion multi-fields (#83595)

The completion field supports multi-fields. In case the sub-field is another completion field, the whole object structure from the incoming document needs to provided as input to the sub-field while parsing it. We have a special XContentParser for this scenario, but it does not handle returning the same object structure multiple times. That is why you can have only one completion sub-field within a completion field, and the error returned when such mechanism breaks is a general parsing error (that mentions a field called dummy_field) that makes users think they have done something wrong in their document. This commit expands testing for this scenario and extends the parser to support it. As part of this change, a new parser is created for each sub-field, which makes it possible to expose the same object structure multiple times, for instance in case a completion field has more than one completion sub-fields. Additionally, the wrapping of both the geo_point multi field parser and the completion multi field parser into a dummy_field object is removed in favour of returning the correct currentName of the main field we are parsing. Additionally getTokenLocation is tweaked to return the location of the field we are parsing in the document, so that error messages become clearer when things go wrong. Closes #83534
elastic · Feb 11, 2022 · a91e692 · a91e692
1 parent 62943fe
commit a91e692
Show file tree

Hide file tree

Showing 8 changed files with 381 additions and 93 deletions.
diff --git a/docs/changelog/83595.yaml b/docs/changelog/83595.yaml
@@ -0,0 +1,6 @@
+pr: 83595
+summary: Completion field to support multiple completion multi-fields
+area: Mapping
+type: bug
+issues:
+ - 83534
diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/MapXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/MapXContentParser.java
@@ -11,14 +11,12 @@
 import org.elasticsearch.xcontent.DeprecationHandler;
 import org.elasticsearch.xcontent.NamedXContentRegistry;
 import org.elasticsearch.xcontent.XContentLocation;
-import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xcontent.XContentType;
 
 import java.io.IOException;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.CharBuffer;
-import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -28,23 +26,10 @@
  */
 public class MapXContentParser extends AbstractXContentParser {
 
-    private XContentType xContentType;
+    private final XContentType xContentType;
     private TokenIterator iterator;
     private boolean closed;
 
-    public static XContentParser wrapObject(Object sourceMap) throws IOException {
-        XContentParser parser = new MapXContentParser(
-            NamedXContentRegistry.EMPTY,
-            DeprecationHandler.IGNORE_DEPRECATIONS,
-            Collections.singletonMap("dummy_field", sourceMap),
-            XContentType.JSON
-        );
-        parser.nextToken(); // start object
-        parser.nextToken(); // field name
-        parser.nextToken(); // field value
-        return parser;
-    }
-
     public MapXContentParser(
         NamedXContentRegistry xContentRegistry,
         DeprecationHandler deprecationHandler,

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java
@@ -13,7 +13,10 @@
 import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.xcontent.DeprecationHandler;
+import org.elasticsearch.xcontent.NamedXContentRegistry;
 import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xcontent.XContentType;
 import org.elasticsearch.xcontent.support.MapXContentParser;
 
 import java.io.IOException;
@@ -53,7 +56,7 @@ public abstract void parse(XContentParser parser, CheckedConsumer<T, IOException
             throws IOException;
 
         private void fetchFromSource(Object sourceMap, Consumer<T> consumer) {
-            try (XContentParser parser = MapXContentParser.wrapObject(sourceMap)) {
+            try (XContentParser parser = wrapObject(sourceMap)) {
                 parse(parser, v -> consumer.accept(normalizeFromSource(v)), e -> {}); /* ignore malformed */
             } catch (IOException e) {
                 throw new UncheckedIOException(e);
@@ -67,6 +70,18 @@ private void fetchFromSource(Object sourceMap, Consumer<T> consumer) {
         // TODO: move geometry normalization to the geometry parser.
         public abstract T normalizeFromSource(T geometry);
 
+        private static XContentParser wrapObject(Object sourceMap) throws IOException {
+            XContentParser parser = new MapXContentParser(
+                NamedXContentRegistry.EMPTY,
+                DeprecationHandler.IGNORE_DEPRECATIONS,
+                Collections.singletonMap("dummy_field", sourceMap),
+                XContentType.JSON
+            );
+            parser.nextToken(); // start object
+            parser.nextToken(); // field name
+            parser.nextToken(); // field value
+            return parser;
+        }
     }
 
     public abstract static class AbstractGeometryFieldType<T> extends MappedFieldType {

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java
@@ -30,11 +30,15 @@
 import org.elasticsearch.search.suggest.completion.CompletionSuggester;
 import org.elasticsearch.search.suggest.completion.context.ContextMapping;
 import org.elasticsearch.search.suggest.completion.context.ContextMappings;
-import org.elasticsearch.xcontent.FilterXContentParser;
+import org.elasticsearch.xcontent.DelegatingXContentParser;
+import org.elasticsearch.xcontent.DeprecationHandler;
+import org.elasticsearch.xcontent.NamedXContentRegistry;
 import org.elasticsearch.xcontent.ToXContent;
+import org.elasticsearch.xcontent.XContentLocation;
 import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xcontent.XContentParser.NumberType;
 import org.elasticsearch.xcontent.XContentParser.Token;
+import org.elasticsearch.xcontent.XContentType;
 import org.elasticsearch.xcontent.support.MapXContentParser;
 
 import java.io.IOException;
@@ -425,8 +429,11 @@ public void parse(DocumentParserContext context) throws IOException {
 
         context.addToFieldNames(fieldType().name());
         for (CompletionInputMetadata metadata : inputMap.values()) {
-            DocumentParserContext externalValueContext = context.switchParser(new CompletionParser(metadata));
-            multiFields.parse(this, externalValueContext);
+            multiFields.parse(
+                this,
+                context,
+                () -> context.switchParser(new MultiFieldParser(metadata, fieldType().name(), context.parser().getTokenLocation()))
+            );
         }
     }
 
@@ -586,28 +593,99 @@ public void doValidate(MappingLookup mappers) {
         }
     }
 
-    private static class CompletionParser extends FilterXContentParser {
+    /**
+     * Parser that exposes the expected format depending on the type of multi-field that is consuming content.
+     * Completion fields can hold multi-fields, which can either parse a simple string value or an object in case of another completion
+     * field. This parser detects which of the two is parsing content and exposes the full object when needed (including input, weight
+     * and context if available), otherwise the input value only.
+     *
+     * A few assumptions are made that make this work:
+     * 1) only string values are supported for a completion field, hence only sub-fields that parse strings are supported
+     * 2) sub-fields that parse simple values only ever call {@link #textOrNull()} to do so. They may call {@link #currentToken()} only to
+     * check if there's a null value, which is irrelevant in the multi-fields scenario as null values are ignored in the parent field and
+     * don't lead to any field creation.
+     * 3) completion is the only sub-field type that may be parsing the object structure.
+     *
+     * The parser is set to expose by default simple value, unless {@link #nextToken()} is called which is what signals that the
+     * consumer supports the object structure.
+     */
+    // This parser changes behaviour depending on which methods are called by consumers, which is extremely delicate. This kind of works for
+    // our internal mappers, but what about mappers from plugins
+    static class MultiFieldParser extends DelegatingXContentParser {
+        private final String textValue;
+        private final String fieldName;
+        private final XContentLocation locationOffset;
+        private final XContentParser fullObjectParser;
+        // we assume that the consumer is parsing values, we will switch to exposing the object format if nextToken is called
+        private boolean parsingObject = false;
+
+        MultiFieldParser(CompletionInputMetadata metadata, String fieldName, XContentLocation locationOffset) {
+            this.fullObjectParser = new MapXContentParser(
+                NamedXContentRegistry.EMPTY,
+                DeprecationHandler.IGNORE_DEPRECATIONS,
+                metadata.toMap(),
+                XContentType.JSON
+            );
+            this.fieldName = fieldName;
+            this.locationOffset = locationOffset;
+            this.textValue = metadata.input;
+        }
 
-        boolean advanced = false;
-        final String textValue;
+        @Override
+        protected XContentParser delegate() {
+            // if consumers are only reading values, they should never go through delegate and rather call the
+            // overridden currentToken and textOrNull below that don't call super
+            assert parsingObject;
+            return fullObjectParser;
+        }
 
-        private CompletionParser(CompletionInputMetadata metadata) throws IOException {
-            super(MapXContentParser.wrapObject(metadata.toMap()));
-            this.textValue = metadata.input;
+        @Override
+        public Token currentToken() {
+            if (parsingObject == false) {
+                // nextToken has not been called, it may or may not be called at a later time.
+                // What we return does not really matter for mappers that support simple values, as they only check for VALUE_NULL.
+                // For mappers that do support objects, START_OBJECT is a good choice.
+                return Token.START_OBJECT;
+            }
+            return super.currentToken();
         }
 
         @Override
         public String textOrNull() throws IOException {
-            if (advanced == false) {
+            if (parsingObject == false) {
                 return textValue;
             }
             return super.textOrNull();
         }
 
         @Override
         public Token nextToken() throws IOException {
-            advanced = true;
+            if (parsingObject == false) {
+                // a completion sub-field is parsing
+                parsingObject = true;
+                // move to START_OBJECT, currentToken has already returned START_OBJECT and we will advance one token further just below
+                this.fullObjectParser.nextToken();
+            }
             return super.nextToken();
         }
+
+        @Override
+        public String currentName() throws IOException {
+            if (parsingObject == false) {
+                return fieldName;
+            }
+            String currentName = super.currentName();
+            if (currentName == null && currentToken() == Token.END_OBJECT) {
+                return fieldName;
+            }
+            return currentName;
+        }
+
+        @Override
+        public XContentLocation getTokenLocation() {
+            // return fixed token location: it's not possible to match the token location while parsing through the object structure,
+            // because completion metadata have been rewritten hence they won't match the incoming document
+            return locationOffset;
+        }
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java
@@ -255,7 +255,7 @@ public void parse(DocumentParserContext context) throws IOException {
                 valuePreview
             );
         }
-        multiFields.parse(this, context);
+        multiFields.parse(this, context, () -> context);
     }
 
     /**
@@ -449,7 +449,7 @@ public final Map<String, NamedAnalyzer> indexAnalyzers() {
         return indexAnalyzers;
     }
 
-    public static class MultiFields implements Iterable<FieldMapper>, ToXContent {
+    public static final class MultiFields implements Iterable<FieldMapper>, ToXContent {
 
         private static final MultiFields EMPTY = new MultiFields(Collections.emptyMap());
 
@@ -507,16 +507,16 @@ private MultiFields(Map<String, FieldMapper> mappers) {
             this.mappers = mappers;
         }
 
-        public void parse(FieldMapper mainField, DocumentParserContext context) throws IOException {
+        public void parse(FieldMapper mainField, DocumentParserContext context, Supplier<DocumentParserContext> multiFieldContextSupplier)
+            throws IOException {
             // TODO: multi fields are really just copy fields, we just need to expose "sub fields" or something that can be part
             // of the mappings
             if (mappers.isEmpty()) {
                 return;
             }
-
             context.path().add(mainField.simpleName());
             for (FieldMapper mapper : mappers.values()) {
-                mapper.parse(context);
+                mapper.parse(multiFieldContextSupplier.get());
             }
             context.path().remove();
         }

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java
@@ -42,9 +42,9 @@
 import org.elasticsearch.search.lookup.FieldValues;
 import org.elasticsearch.search.lookup.SearchLookup;
 import org.elasticsearch.search.runtime.GeoPointScriptFieldDistanceFeatureQuery;
+import org.elasticsearch.xcontent.FilterXContentParser;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentParser;
-import org.elasticsearch.xcontent.support.MapXContentParser;
 
 import java.io.IOException;
 import java.io.UncheckedIOException;
@@ -215,7 +215,38 @@ protected void index(DocumentParserContext context, GeoPoint geometry) throws IO
             context.doc().add(new StoredField(fieldType().name(), geometry.toString()));
         }
         // TODO phase out geohash (which is currently used in the CompletionSuggester)
-        multiFields.parse(this, context.switchParser(MapXContentParser.wrapObject(geometry.geohash())));
+        // we only expose the geohash value and disallow advancing tokens, hence we can reuse the same parser throughout multiple sub-fields
+        DocumentParserContext parserContext = context.switchParser(new GeoHashMultiFieldParser(context.parser(), geometry.geohash()));
+        multiFields.parse(this, context, () -> parserContext);
+    }
+
+    /**
+     * Parser that pretends to be the main document parser, but exposes the provided geohash regardless of how the geopoint was provided
+     * in the incoming document. We rely on the fact that consumers are only ever call {@link XContentParser#textOrNull()} and never
+     * advance tokens, which is explicitly disallowed by this parser.
+     */
+    static class GeoHashMultiFieldParser extends FilterXContentParser {
+        private final String value;
+
+        GeoHashMultiFieldParser(XContentParser innerParser, String value) {
+            super(innerParser);
+            this.value = value;
+        }
+
+        @Override
+        public String textOrNull() throws IOException {
+            return value;
+        }
+
+        @Override
+        public Token currentToken() {
+            return Token.VALUE_STRING;
+        }
+
+        @Override
+        public Token nextToken() throws IOException {
+            throw new UnsupportedOperationException();
+        }
     }
 
     @Override