Skip to content

Commit

Permalink
Search - return ignored field values from fields api. (#78697)
Browse files Browse the repository at this point in the history
Since Kibana's Discover switched to retrieving values via the fields API rather than source there have been gaps in the display caused by "ignored" fields (those that fall foul of ignore_above and ignore_malformed size and formatting rules).

This PR returns ignored values from source when a user-requested field fails to be parsed for a document. In these cases the corresponding hit adds a new ignored_field_values section in the response.

Closes #74121
  • Loading branch information
markharwood committed Oct 13, 2021
1 parent 5ca66d0 commit 228992b
Show file tree
Hide file tree
Showing 31 changed files with 264 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,96 @@ won't be included in the response because `include_unmapped` isn't set to
// TESTRESPONSE[s/"max_score" : 1.0/"max_score" : $body.hits.max_score/]
// TESTRESPONSE[s/"_score" : 1.0/"_score" : $body.hits.hits.0._score/]

[discrete]
[[Ignored-field values]]
==== Ignored field values
The `fields` option only returns values that were valid when indexed.
If your search request asks for values from a field that ignored certain
because they were malformed or too large these values are returned
separately in an `ignored_field_values` section.

In this example we index a document that has a value which is ignored and
not added to the index so is shown separately in search results:

[source,console]
----
PUT my-index-000001
{
"mappings": {
"properties": {
"my-small" : { "type" : "keyword", "ignore_above": 2 }, <1>
"my-large" : { "type" : "keyword" }
}
}
}
PUT my-index-000001/_doc/1?refresh=true
{
"my-small": ["ok", "bad"], <2>
"my-large": "ok content"
}
POST my-index-000001/_search
{
"fields": ["my-*"],
"_source": false
}
----

<1> This field has a size restriction
<2> This document field has a value that exceeds the size restriction so is ignored and not indexed

The response will contain ignored field values under the `ignored_field_values` path.
These values are retrieved from the document's original JSON source and are raw so will
not be formatted or treated in any way, unlike the successfully indexed fields which are
returned in the `fields` section.

[source,console-result]
----
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_id" : "1",
"_score" : 1.0,
"_ignored" : [ "my-small"],
"fields" : {
"my-large": [
"ok content"
],
"my-small": [
"ok"
]
},
"ignored_field_values" : {
"my-small": [
"bad"
]
}
}
]
}
}
----
// TESTRESPONSE[s/"took" : 2/"took": $body.took/]
// TESTRESPONSE[s/"max_score" : 1.0/"max_score" : $body.hits.max_score/]
// TESTRESPONSE[s/"_score" : 1.0/"_score" : $body.hits.hits.0._score/]


[discrete]
[[source-filtering]]
=== The `_source` option
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -178,7 +179,7 @@ private Function<LeafReaderContext,CheckedIntFunction<List<Object>, IOException>
return docID -> {
try {
sourceLookup.setSegmentAndDocument(context, docID);
return valueFetcher.fetchValues(sourceLookup);
return valueFetcher.fetchValues(sourceLookup, new ArrayList<>());
} catch (IOException e) {
throw new UncheckedIOException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ static class TokenCountFieldType extends NumberFieldMapper.NumberFieldType {
@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
if (hasDocValues() == false) {
return lookup -> List.of();
return (lookup, ignoredValues) -> List.of();
}
return new DocValueFetcher(docValueFormat(format, null), context.getForField(this));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
// Although this is an internal field, we return it in the list of all field types. So we
// provide an empty value fetcher here instead of throwing an error.
return lookup -> List.of();
return (lookup, ignoredValues) -> List.of();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ private static class SizeFieldType extends NumberFieldType {
@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
if (hasDocValues() == false) {
return lookup -> List.of();
return (lookup, ignoredValues) -> List.of();
}
return new DocValueFetcher(docValueFormat(format, null), context.getForField(this));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,20 @@ setup:

- length: { hits.hits: 1 }
- is_true: hits.hits.0._ignored


---
"ignored_field_values is returned by default":
- skip:
version: " - 7.99.99"
reason: ignored_field_values was added in 8.0

- do:
search:
rest_total_hits_as_int: true
body: { query: { ids: { "values": [ "3" ] } },"_source":false, "fields":["my*"]}

- length: { hits.hits: 1 }
- length: { hits.hits.0._ignored: 2}
- length: { hits.hits.0.ignored_field_values.my_date: 1}
- length: { hits.hits.0.ignored_field_values.my_ip: 1}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.common.document;

import org.elasticsearch.Version;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
Expand All @@ -19,6 +20,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
Expand All @@ -32,21 +34,33 @@
* @see SearchHit
* @see GetResult
*/
public class DocumentField implements Writeable, ToXContentFragment, Iterable<Object> {
public class DocumentField implements Writeable, Iterable<Object> {

private final String name;
private final List<Object> values;

private List<Object> ignoredValues;

public DocumentField(StreamInput in) throws IOException {
name = in.readString();
values = in.readList(StreamInput::readGenericValue);
if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
ignoredValues = in.readList(StreamInput::readGenericValue);
} else {
ignoredValues = Collections.emptyList();
}
}

public DocumentField(String name, List<Object> values) {
this(name, values, Collections.emptyList());
}

public DocumentField(String name, List<Object> values, List<Object> ignoredValues) {
this.name = Objects.requireNonNull(name, "name must not be null");
this.values = Objects.requireNonNull(values, "values must not be null");
this.ignoredValues = Objects.requireNonNull(ignoredValues, "ignoredValues must not be null");
}


/**
* The name of the field.
*/
Expand Down Expand Up @@ -76,25 +90,55 @@ public List<Object> getValues() {
public Iterator<Object> iterator() {
return values.iterator();
}

/**
* The field's ignored values as an immutable list.
*/
public List<Object> getIgnoredValues() {
return ignoredValues == Collections.emptyList() ? ignoredValues : Collections.unmodifiableList(ignoredValues);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeCollection(values, StreamOutput::writeGenericValue);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startArray(name);
for (Object value : values) {
// This call doesn't really need to support writing any kind of object, since the values
// here are always serializable to xContent. Each value could be a leaf types like a string,
// number, or boolean, a list of such values, or a map of such values with string keys.
builder.value(value);
if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
out.writeCollection(ignoredValues, StreamOutput::writeGenericValue);
}
builder.endArray();
return builder;

}

public ToXContentFragment getValidValuesWriter(){
return new ToXContentFragment() {

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startArray(name);
for (Object value : values) {
// This call doesn't really need to support writing any kind of object, since the values
// here are always serializable to xContent. Each value could be a leaf types like a string,
// number, or boolean, a list of such values, or a map of such values with string keys.
builder.value(value);
}
builder.endArray();
return builder;
}
};
}
public ToXContentFragment getIgnoredValuesWriter(){
return new ToXContentFragment() {

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startArray(name);
for (Object value : ignoredValues) {
builder.value(value);
}
builder.endArray();
return builder;
}
};
}

public static DocumentField fromXContent(XContentParser parser) throws IOException {
ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser);
Expand All @@ -117,19 +161,22 @@ public boolean equals(Object o) {
return false;
}
DocumentField objects = (DocumentField) o;
return Objects.equals(name, objects.name) && Objects.equals(values, objects.values);
return Objects.equals(name, objects.name) && Objects.equals(values, objects.values) &&
Objects.equals(ignoredValues, objects.ignoredValues);
}

@Override
public int hashCode() {
return Objects.hash(name, values);
return Objects.hash(name, values, ignoredValues);
}

@Override
public String toString() {
return "DocumentField{" +
"name='" + name + '\'' +
", values=" + values +
", ignoredValues=" + ignoredValues +
'}';
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ public XContentBuilder toXContentEmbedded(XContentBuilder builder, Params params
if (documentFields.isEmpty() == false) {
builder.startObject(FIELDS);
for (DocumentField field : documentFields.values()) {
field.toXContent(builder, params);
field.getValidValuesWriter().toXContent(builder, params);
}
builder.endObject();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public ArraySourceValueFetcher(String fieldName, SearchExecutionContext context,
}

@Override
public List<Object> fetchValues(SourceLookup lookup) {
public List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) {
List<Object> values = new ArrayList<>();
for (String path : sourcePaths) {
Object sourceValue = lookup.extractValue(path, nullValue);
Expand All @@ -55,7 +55,7 @@ public List<Object> fetchValues(SourceLookup lookup) {
} catch (Exception e) {
// if parsing fails here then it would have failed at index time
// as well, meaning that we must be ignoring malformed values.
// So ignore it here too.
ignoredValues.add(sourceValue);
}
}
return values;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public void setNextReader(LeafReaderContext context) {
}

@Override
public List<Object> fetchValues(SourceLookup lookup) throws IOException {
public List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) throws IOException {
if (false == formattedDocValues.advanceExact(lookup.docId())) {
return emptyList();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public NestedValueFetcher(String nestedField, FieldFetcher nestedFieldFetcher) {
}

@Override
public List<Object> fetchValues(SourceLookup lookup) throws IOException {
public List<Object> fetchValues(SourceLookup lookup, List<Object> includedValues) throws IOException {
List<Object> nestedEntriesToReturn = new ArrayList<>();
Map<String, Object> filteredSource = new HashMap<>();
Map<String, Object> stub = createSourceMapStub(filteredSource);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public SourceValueFetcher(Set<String> sourcePaths, Object nullValue) {
}

@Override
public List<Object> fetchValues(SourceLookup lookup) {
public List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) {
List<Object> values = new ArrayList<>();
for (String path : sourcePaths) {
Object sourceValue = lookup.extractValue(path, nullValue);
Expand All @@ -76,8 +76,11 @@ public List<Object> fetchValues(SourceLookup lookup) {
Object parsedValue = parseSourceValue(value);
if (parsedValue != null) {
values.add(parsedValue);
} else {
ignoredValues.add(value);
}
} catch (Exception e) {
ignoredValues.add(value);
// if we get a parsing exception here, that means that the
// value in _source would have also caused a parsing
// exception at index time and the value ignored.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public void setNextReader(LeafReaderContext context) {
}

@Override
public List<Object> fetchValues(SourceLookup lookup) throws IOException {
public List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) throws IOException {
leafSearchLookup.setDocument(lookup.docId());
return leafSearchLookup.fields().get(fieldname).getValues();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ public interface ValueFetcher {
* should not be relied on.
*
* @param lookup a lookup structure over the document's source.
* @param ignoredValues a mutable list to collect any ignored values as they were originally presented in source
* @return a list a standardized field values.
*/
List<Object> fetchValues(SourceLookup lookup) throws IOException;
List<Object> fetchValues(SourceLookup lookup, List<Object> ignoredValues) throws IOException;

/**
* Update the leaf reader used to fetch values.
Expand Down

0 comments on commit 228992b

Please sign in to comment.