Search - return ignored field values from fields api. (#78697)

Since Kibana's Discover switched to retrieving values via the fields API rather than source there have been gaps in the display caused by "ignored" fields (those that fall foul of ignore_above and ignore_malformed size and formatting rules). This PR returns ignored values from source when a user-requested field fails to be parsed for a document. In these cases the corresponding hit adds a new ignored_field_values section in the response. Closes #74121
elastic · Oct 13, 2021 · 228992b · 228992b
1 parent 5ca66d0
commit 228992b
Show file tree

Hide file tree

Showing 31 changed files with 264 additions and 69 deletions.
diff --git a/docs/reference/search/search-your-data/retrieve-selected-fields.asciidoc b/docs/reference/search/search-your-data/retrieve-selected-fields.asciidoc
@@ -376,6 +376,96 @@ won't be included in the response because `include_unmapped` isn't set to
 // TESTRESPONSE[s/"max_score" : 1.0/"max_score" : $body.hits.max_score/]
 // TESTRESPONSE[s/"_score" : 1.0/"_score" : $body.hits.hits.0._score/]
 
+[discrete]
+[[Ignored-field values]]
+==== Ignored field values
+The `fields` option only returns values that were valid when indexed.
+If your search request asks for values from a field that ignored certain
+because they were malformed or too large these values are returned
+separately in an `ignored_field_values` section.
+
+In this example we index a document that has a value which is ignored and
+not added to the index so is shown separately in search results:
+
+[source,console]
+----
+PUT my-index-000001
+{
+  "mappings": {
+    "properties": {
+      "my-small" : { "type" : "keyword", "ignore_above": 2 }, <1>
+      "my-large" : { "type" : "keyword" }
+    }
+  }
+}
+
+PUT my-index-000001/_doc/1?refresh=true
+{
+  "my-small": ["ok", "bad"], <2>
+  "my-large": "ok content"
+}
+
+POST my-index-000001/_search
+{
+  "fields": ["my-*"],
+  "_source": false
+}
+----
+
+<1> This field has a size restriction
+<2> This document field has a value that exceeds the size restriction so is ignored and not indexed
+
+The response will contain ignored field values under the  `ignored_field_values` path.
+These values are retrieved from the document's original JSON source and are raw so will
+not be formatted or treated in any way, unlike the successfully indexed fields which are
+returned in the `fields` section.
+
+[source,console-result]
+----
+{
+  "took" : 2,
+  "timed_out" : false,
+  "_shards" : {
+    "total" : 1,
+    "successful" : 1,
+    "skipped" : 0,
+    "failed" : 0
+  },
+  "hits" : {
+    "total" : {
+      "value" : 1,
+      "relation" : "eq"
+    },
+    "max_score" : 1.0,
+    "hits" : [
+      {
+        "_index" : "my-index-000001",
+        "_id" : "1",
+        "_score" : 1.0,
+        "_ignored" : [ "my-small"],
+        "fields" : {
+          "my-large": [
+            "ok content"
+          ],
+          "my-small": [
+            "ok"
+          ]
+        },
+        "ignored_field_values" : {
+          "my-small": [
+            "bad"
+          ]
+        }
+      }
+    ]
+  }
+}
+----
+// TESTRESPONSE[s/"took" : 2/"took": $body.took/]
+// TESTRESPONSE[s/"max_score" : 1.0/"max_score" : $body.hits.max_score/]
+// TESTRESPONSE[s/"_score" : 1.0/"_score" : $body.hits.hits.0._score/]
+
+
 [discrete]
 [[source-filtering]]
 === The `_source` option

diff --git a/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -48,6 +48,7 @@
 
 import java.io.IOException;
 import java.io.UncheckedIOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -178,7 +179,7 @@ private Function<LeafReaderContext,CheckedIntFunction<List<Object>, IOException>
                 return docID -> {
                     try {
                         sourceLookup.setSegmentAndDocument(context, docID);
-                        return valueFetcher.fetchValues(sourceLookup);
+                        return valueFetcher.fetchValues(sourceLookup, new ArrayList<>());
                     } catch (IOException e) {
                         throw new UncheckedIOException(e);
                     }

diff --git a/...per-extras/src/main/java/org/elasticsearch/index/mapper/extras/TokenCountFieldMapper.java b/...per-extras/src/main/java/org/elasticsearch/index/mapper/extras/TokenCountFieldMapper.java
@@ -109,7 +109,7 @@ static class TokenCountFieldType extends NumberFieldMapper.NumberFieldType {
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
             if (hasDocValues() == false) {
-                return lookup -> List.of();
+                return (lookup, ignoredValues) -> List.of();
             }
             return new DocValueFetcher(docValueFormat(format, null), context.getForField(this));
         }

diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentIdFieldMapper.java b/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentIdFieldMapper.java
@@ -76,7 +76,7 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
             // Although this is an internal field, we return it in the list of all field types. So we
             // provide an empty value fetcher here instead of throwing an error.
-            return lookup -> List.of();
+            return (lookup, ignoredValues) -> List.of();
         }
 
         @Override

diff --git a/plugins/mapper-size/src/main/java/org/elasticsearch/index/mapper/size/SizeFieldMapper.java b/plugins/mapper-size/src/main/java/org/elasticsearch/index/mapper/size/SizeFieldMapper.java
@@ -57,7 +57,7 @@ private static class SizeFieldType extends NumberFieldType {
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
             if (hasDocValues() == false) {
-                return lookup -> List.of();
+                return (lookup, ignoredValues) -> List.of();
             }
             return new DocValueFetcher(docValueFormat(format, null), context.getForField(this));
         }

diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/200_ignore_malformed.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/200_ignore_malformed.yml
@@ -88,3 +88,20 @@ setup:
 
   - length:   { hits.hits: 1  }
   - is_true:  hits.hits.0._ignored
+
+
+---
+"ignored_field_values is returned by default":
+  - skip:
+      version: " - 7.99.99"
+      reason: ignored_field_values was added in 8.0
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body: { query: { ids: { "values": [ "3" ] } },"_source":false, "fields":["my*"]}
+
+  - length:   { hits.hits: 1  }
+  - length:   { hits.hits.0._ignored: 2}
+  - length:   { hits.hits.0.ignored_field_values.my_date: 1}
+  - length:   { hits.hits.0.ignored_field_values.my_ip: 1}
diff --git a/server/src/main/java/org/elasticsearch/common/document/DocumentField.java b/server/src/main/java/org/elasticsearch/common/document/DocumentField.java
@@ -8,6 +8,7 @@
 
 package org.elasticsearch.common.document;
 
+import org.elasticsearch.Version;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
@@ -19,6 +20,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Objects;
@@ -32,21 +34,33 @@
  * @see SearchHit
  * @see GetResult
  */
-public class DocumentField implements Writeable, ToXContentFragment, Iterable<Object> {
+public class DocumentField implements Writeable, Iterable<Object> {
 
     private final String name;
     private final List<Object> values;
-
+    private List<Object> ignoredValues;
+
     public DocumentField(StreamInput in) throws IOException {
         name = in.readString();
         values = in.readList(StreamInput::readGenericValue);
+        if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
+            ignoredValues = in.readList(StreamInput::readGenericValue);
+        } else {
+            ignoredValues = Collections.emptyList();
+        }
     }
 
     public DocumentField(String name, List<Object> values) {
+        this(name, values, Collections.emptyList());
+    }
+
+    public DocumentField(String name, List<Object> values, List<Object> ignoredValues) {
         this.name = Objects.requireNonNull(name, "name must not be null");
         this.values = Objects.requireNonNull(values, "values must not be null");
+        this.ignoredValues = Objects.requireNonNull(ignoredValues, "ignoredValues must not be null");
     }
 
+
     /**
      * The name of the field.
      */
@@ -76,25 +90,55 @@ public List<Object> getValues() {
     public Iterator<Object> iterator() {
         return values.iterator();
     }
+
+    /**
+     * The field's ignored values as an immutable list.
+     */
+    public List<Object> getIgnoredValues() {
+        return ignoredValues == Collections.emptyList() ? ignoredValues : Collections.unmodifiableList(ignoredValues);
+    }    
 
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         out.writeString(name);
         out.writeCollection(values, StreamOutput::writeGenericValue);
-    }
-
-    @Override
-    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
-        builder.startArray(name);
-        for (Object value : values) {
-            // This call doesn't really need to support writing any kind of object, since the values
-            // here are always serializable to xContent. Each value could be a leaf types like a string,
-            // number, or boolean, a list of such values, or a map of such values with string keys.
-            builder.value(value);
+        if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
+            out.writeCollection(ignoredValues, StreamOutput::writeGenericValue);
         }
-        builder.endArray();
-        return builder;
+
     }
+
+    public ToXContentFragment getValidValuesWriter(){
+        return new ToXContentFragment() {
+
+            @Override
+            public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+                builder.startArray(name);
+                for (Object value : values) {
+                    // This call doesn't really need to support writing any kind of object, since the values
+                    // here are always serializable to xContent. Each value could be a leaf types like a string,
+                    // number, or boolean, a list of such values, or a map of such values with string keys.
+                    builder.value(value);
+                }
+                builder.endArray();
+                return builder;
+            }
+        };
+    }
+    public ToXContentFragment getIgnoredValuesWriter(){
+        return new ToXContentFragment() {
+
+            @Override
+            public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+                builder.startArray(name);
+                for (Object value : ignoredValues) {
+                    builder.value(value);
+                }
+                builder.endArray();
+                return builder;
+            }
+        };
+    }    
 
     public static DocumentField fromXContent(XContentParser parser) throws IOException {
         ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser);
@@ -117,19 +161,22 @@ public boolean equals(Object o) {
             return false;
         }
         DocumentField objects = (DocumentField) o;
-        return Objects.equals(name, objects.name) && Objects.equals(values, objects.values);
+        return Objects.equals(name, objects.name) && Objects.equals(values, objects.values) && 
+            Objects.equals(ignoredValues, objects.ignoredValues);
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(name, values);
+        return Objects.hash(name, values, ignoredValues);
     }
 
     @Override
     public String toString() {
         return "DocumentField{" +
                 "name='" + name + '\'' +
                 ", values=" + values +
+                ", ignoredValues=" + ignoredValues +
                 '}';
     }
+
 }
diff --git a/server/src/main/java/org/elasticsearch/index/get/GetResult.java b/server/src/main/java/org/elasticsearch/index/get/GetResult.java
@@ -275,7 +275,7 @@ public XContentBuilder toXContentEmbedded(XContentBuilder builder, Params params
         if (documentFields.isEmpty() == false) {
             builder.startObject(FIELDS);
             for (DocumentField field : documentFields.values()) {
-                field.toXContent(builder, params);
+                field.getValidValuesWriter().toXContent(builder, params);
             }
             builder.endObject();
         }

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ArraySourceValueFetcher.java b/server/src/main/java/org/elasticsearch/index/mapper/ArraySourceValueFetcher.java
@@ -43,7 +43,7 @@ public ArraySourceValueFetcher(String fieldName, SearchExecutionContext context,
     }
 
     @Override
-    public List<Object> fetchValues(SourceLookup lookup) {
+    public List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) {
         List<Object> values = new ArrayList<>();
         for (String path : sourcePaths) {
             Object sourceValue = lookup.extractValue(path, nullValue);
@@ -55,7 +55,7 @@ public List<Object> fetchValues(SourceLookup lookup) {
             } catch (Exception e) {
                 // if parsing fails here then it would have failed at index time
                 // as well, meaning that we must be ignoring malformed values.
-                // So ignore it here too.
+                ignoredValues.add(sourceValue);
             }
         }
         return values;

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocValueFetcher.java b/server/src/main/java/org/elasticsearch/index/mapper/DocValueFetcher.java
@@ -39,7 +39,7 @@ public void setNextReader(LeafReaderContext context) {
     }
 
     @Override
-    public List<Object> fetchValues(SourceLookup lookup) throws IOException {
+    public List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) throws IOException {
         if (false == formattedDocValues.advanceExact(lookup.docId())) {
             return emptyList();
         }

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedValueFetcher.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedValueFetcher.java
@@ -39,7 +39,7 @@ public NestedValueFetcher(String nestedField, FieldFetcher nestedFieldFetcher) {
     }
 
     @Override
-    public List<Object> fetchValues(SourceLookup lookup) throws IOException {
+    public List<Object> fetchValues(SourceLookup lookup, List<Object> includedValues) throws IOException {
         List<Object> nestedEntriesToReturn = new ArrayList<>();
         Map<String, Object> filteredSource = new HashMap<>();
         Map<String, Object> stub = createSourceMapStub(filteredSource);

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceValueFetcher.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceValueFetcher.java
@@ -51,7 +51,7 @@ public SourceValueFetcher(Set<String> sourcePaths, Object nullValue) {
     }
 
     @Override
-    public List<Object> fetchValues(SourceLookup lookup) {
+    public List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) {
         List<Object> values = new ArrayList<>();
         for (String path : sourcePaths) {
             Object sourceValue = lookup.extractValue(path, nullValue);
@@ -76,8 +76,11 @@ public List<Object> fetchValues(SourceLookup lookup) {
                         Object parsedValue = parseSourceValue(value);
                         if (parsedValue != null) {
                             values.add(parsedValue);
+                        } else {
+                            ignoredValues.add(value);
                         }
                     } catch (Exception e) {
+                        ignoredValues.add(value);
                         // if we get a parsing exception here, that means that the
                         // value in _source would have also caused a parsing
                         // exception at index time and the value ignored.

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/StoredValueFetcher.java b/server/src/main/java/org/elasticsearch/index/mapper/StoredValueFetcher.java
@@ -36,7 +36,7 @@ public void setNextReader(LeafReaderContext context) {
     }
 
     @Override
-    public List<Object> fetchValues(SourceLookup lookup) throws IOException {
+    public List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) throws IOException {
         leafSearchLookup.setDocument(lookup.docId());
         return leafSearchLookup.fields().get(fieldname).getValues();
     }

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ValueFetcher.java b/server/src/main/java/org/elasticsearch/index/mapper/ValueFetcher.java
@@ -31,9 +31,10 @@ public interface ValueFetcher {
     * should not be relied on.
     *
     * @param lookup a lookup structure over the document's source.
+    * @param ignoredValues a mutable list to collect any ignored values as they were originally presented in source
     * @return a list a standardized field values.
     */
-    List<Object> fetchValues(SourceLookup lookup) throws IOException;
+    List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) throws IOException;
 
     /**
      * Update the leaf reader used to fetch values.