Skip to content

Commit

Permalink
[7.17] [ML] Allow retrieving boolean fields from _source in DFA jobs (
Browse files Browse the repository at this point in the history
#85672) (#85723)

This commit fixes a bug where `boolean` mapped fields may only
be retrieved from doc values in Data Frame Analytics jobs.

While this problem may occur for `boolean` fields with disabled
doc values, a more common way of being affected by this bug is
running an analysis against data with a very high number of fields.
If there are more fields than the `index.max_docvalue_fields_search`
setting, we fall back to retrieving the fields from _source. Then
`boolean` mapped fields would fail with the error message "cannot
apply boolean mapping to field [<field>]".
  • Loading branch information
dimitris-athanasiou committed Apr 6, 2022
1 parent a86e504 commit c7f08ed
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 11 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/85672.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 85672
summary: Allow retrieving `boolean` fields from `_source` in DFA jobs
area: Machine Learning
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.core.Booleans;
import org.elasticsearch.index.mapper.BooleanFieldMapper;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.xpack.core.ml.utils.MlStrings;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -235,39 +236,70 @@ private static boolean isNestedOrObject(Map<String, FieldCapabilities> fieldCaps
/**
* Makes boolean fields behave as a field of different type.
*/
private static final class BooleanMapper<T> extends DocValueField {
private static final class BooleanMapper<T> extends AbstractField {

private static final Set<String> TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE);

private final ExtractedField field;
private final T trueValue;
private final T falseValue;

BooleanMapper(ExtractedField field, T trueValue, T falseValue) {
super(field.getName(), TYPES);
if (field.getMethod() != Method.DOC_VALUE || field.getTypes().contains(BooleanFieldMapper.CONTENT_TYPE) == false) {
this.field = field;
if (field.getTypes().contains(BooleanFieldMapper.CONTENT_TYPE) == false) {
throw new IllegalArgumentException("cannot apply boolean mapping to field [" + field.getName() + "]");
}
this.trueValue = trueValue;
this.falseValue = falseValue;
}

@Override
public Method getMethod() {
return field.getMethod();
}

@Override
public Object[] value(SearchHit hit) {
DocumentField keyValue = hit.field(getName());
if (keyValue != null) {
return keyValue.getValues().stream().map(v -> Boolean.TRUE.equals(v) ? trueValue : falseValue).toArray();
Object[] value = field.value(hit);
if (value != null) {
return Arrays.stream(value).map(v -> {
boolean asBoolean;
if (v instanceof Boolean) {
asBoolean = (boolean) v;
} else {
asBoolean = Booleans.parseBoolean(v.toString());
}
return asBoolean ? trueValue : falseValue;
}).toArray();
}
return new Object[0];
}

@Override
public boolean supportsFromSource() {
return false;
return field.supportsFromSource();
}

@Override
public ExtractedField newFromSource() {
throw new UnsupportedOperationException();
return field.newFromSource();
}

@Override
public boolean isMultiField() {
return field.isMultiField();
}

@Override
public String getParentField() {
return field.getParentField();
}

@Override
public String getDocValueFormat() {
return field.getDocValueFormat();
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ public void testBuildGivenMultiFields() {
assertThat(airportField.getParentField(), equalTo("airport"));
}

public void testApplyBooleanMapping() {
public void testApplyBooleanMapping_GivenDocValueField() {
DocValueField aBool = new DocValueField("a_bool", Collections.singleton("boolean"));

ExtractedField mapped = ExtractedFields.applyBooleanMapping(aBool);
Expand All @@ -132,8 +132,35 @@ public void testApplyBooleanMapping() {

assertThat(mapped.getName(), equalTo(aBool.getName()));
assertThat(mapped.getMethod(), equalTo(aBool.getMethod()));
assertThat(mapped.supportsFromSource(), is(false));
expectThrows(UnsupportedOperationException.class, mapped::newFromSource);
assertThat(mapped.supportsFromSource(), is(aBool.supportsFromSource()));
}

public void testApplyBooleanMapping_GivenSourceField() {
SourceField aBool = new SourceField("a_bool", Collections.singleton("boolean"));

ExtractedField mapped = ExtractedFields.applyBooleanMapping(aBool);

SearchHit hitTrue = new SearchHitBuilder(42).setSource("{\"a_bool\": true}").build();
SearchHit hitFalse = new SearchHitBuilder(42).setSource("{\"a_bool\": false}").build();
SearchHit hitTrueArray = new SearchHitBuilder(42).setSource("{\"a_bool\": [\"true\", true]}").build();
SearchHit hitFalseArray = new SearchHitBuilder(42).setSource("{\"a_bool\": [\"false\", false]}").build();

assertThat(mapped.value(hitTrue), equalTo(new Integer[] { 1 }));
assertThat(mapped.value(hitFalse), equalTo(new Integer[] { 0 }));
assertThat(mapped.value(hitTrueArray), equalTo(new Integer[] { 1, 1 }));
assertThat(mapped.value(hitFalseArray), equalTo(new Integer[] { 0, 0 }));

assertThat(mapped.getName(), equalTo(aBool.getName()));
assertThat(mapped.getMethod(), equalTo(aBool.getMethod()));
assertThat(mapped.supportsFromSource(), is(aBool.supportsFromSource()));
}

public void testApplyBooleanMapping_GivenNonBooleanField() {
SourceField aBool = new SourceField("not_a_bool", Collections.singleton("integer"));

IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ExtractedFields.applyBooleanMapping(aBool));

assertThat(e.getMessage(), equalTo("cannot apply boolean mapping to field [not_a_bool]"));
}

public void testBuildGivenFieldWithoutMappings() {
Expand Down

0 comments on commit c7f08ed

Please sign in to comment.