Skip to content

Commit

Permalink
[7.10] [ML] Extract dependent variable's mapping correctly in case of…
Browse files Browse the repository at this point in the history
… a multi-field (#63813) (#64287)
  • Loading branch information
przemekwitek committed Nov 16, 2020
1 parent 9551cb3 commit de668ab
Show file tree
Hide file tree
Showing 12 changed files with 196 additions and 92 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public class FieldCapabilitiesResponse extends ActionResponse implements ToXCont
private final Map<String, Map<String, FieldCapabilities>> responseMap;
private final List<FieldCapabilitiesIndexResponse> indexResponses;

FieldCapabilitiesResponse(String[] indices, Map<String, Map<String, FieldCapabilities>> responseMap) {
public FieldCapabilitiesResponse(String[] indices, Map<String, Map<String, FieldCapabilities>> responseMap) {
this(indices, responseMap, Collections.emptyList());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
package org.elasticsearch.xpack.core.ml.dataframe.analyses;

import org.elasticsearch.Version;
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Randomness;
Expand All @@ -14,7 +16,6 @@
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.FieldAliasMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.ObjectMapper;
Expand All @@ -41,7 +42,6 @@

import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
import static org.elasticsearch.common.xcontent.support.XContentMapValues.extractValue;

public class Classification implements DataFrameAnalysis {

Expand Down Expand Up @@ -376,28 +376,22 @@ public List<FieldCardinalityConstraint> getFieldCardinalityConstraints() {

@SuppressWarnings("unchecked")
@Override
public Map<String, Object> getExplicitlyMappedFields(Map<String, Object> mappingsProperties, String resultsFieldName) {
public Map<String, Object> getExplicitlyMappedFields(String resultsFieldName, FieldCapabilitiesResponse fieldCapabilitiesResponse) {
Map<String, Object> additionalProperties = new HashMap<>();
additionalProperties.put(resultsFieldName + ".feature_importance", FEATURE_IMPORTANCE_MAPPING);
Object dependentVariableMapping = extractMapping(dependentVariable, mappingsProperties);
if ((dependentVariableMapping instanceof Map) == false) {
if (fieldCapabilitiesResponse == null) {
return additionalProperties;
}
Map<String, Object> dependentVariableMappingAsMap = (Map) dependentVariableMapping;
// If the source field is an alias, fetch the concrete field that the alias points to.
if (FieldAliasMapper.CONTENT_TYPE.equals(dependentVariableMappingAsMap.get("type"))) {
String path = (String) dependentVariableMappingAsMap.get(FieldAliasMapper.Names.PATH);
dependentVariableMapping = extractMapping(path, mappingsProperties);
}
// We may have updated the value of {@code dependentVariableMapping} in the "if" block above.
// Hence, we need to check the "instanceof" condition again.
if ((dependentVariableMapping instanceof Map) == false) {
Map<String, FieldCapabilities> dependentVariableFieldCaps = fieldCapabilitiesResponse.getField(dependentVariable);
if (dependentVariableFieldCaps == null || dependentVariableFieldCaps.isEmpty()) {
return additionalProperties;
}
additionalProperties.put(resultsFieldName + "." + predictionFieldName, dependentVariableMapping);
Object dependentVariableMappingType = dependentVariableFieldCaps.values().iterator().next().getType();
additionalProperties.put(
resultsFieldName + "." + predictionFieldName, Collections.singletonMap("type", dependentVariableMappingType));

Map<String, Object> topClassesProperties = new HashMap<>();
topClassesProperties.put("class_name", dependentVariableMapping);
topClassesProperties.put("class_name", Collections.singletonMap("type", dependentVariableMappingType));
topClassesProperties.put("class_probability", Collections.singletonMap("type", NumberFieldMapper.NumberType.DOUBLE.typeName()));

Map<String, Object> topClassesMapping = new HashMap<>();
Expand All @@ -408,10 +402,6 @@ public Map<String, Object> getExplicitlyMappedFields(Map<String, Object> mapping
return additionalProperties;
}

private static Object extractMapping(String path, Map<String, Object> mappingsProperties) {
return extractValue(String.join(".properties.", path.split("\\.")), mappingsProperties);
}

@Override
public boolean supportsMissingValues() {
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
package org.elasticsearch.xpack.core.ml.dataframe.analyses;

import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.stream.NamedWriteable;
import org.elasticsearch.common.xcontent.ToXContentObject;
Expand Down Expand Up @@ -46,11 +47,11 @@ public interface DataFrameAnalysis extends ToXContentObject, NamedWriteable {
/**
* Returns fields for which the mappings should be either predefined or copied from source index to destination index.
*
* @param mappingsProperties mappings.properties portion of the index mappings
* @param resultsFieldName name of the results field under which all the results are stored
* @param fieldCapabilitiesResponse field capabilities fetched for this analysis' required fields
* @return {@link Map} containing fields for which the mappings should be handled explicitly
*/
Map<String, Object> getExplicitlyMappedFields(Map<String, Object> mappingsProperties, String resultsFieldName);
Map<String, Object> getExplicitlyMappedFields(String resultsFieldName, FieldCapabilitiesResponse fieldCapabilitiesResponse);

/**
* @return {@code true} if this analysis supports data frame rows with missing values
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.elasticsearch.xpack.core.ml.dataframe.analyses;

import org.elasticsearch.Version;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
Expand Down Expand Up @@ -245,7 +246,7 @@ public List<FieldCardinalityConstraint> getFieldCardinalityConstraints() {
}

@Override
public Map<String, Object> getExplicitlyMappedFields(Map<String, Object> mappingsProperties, String resultsFieldName) {
public Map<String, Object> getExplicitlyMappedFields(String resultsFieldName, FieldCapabilitiesResponse fieldCapabilitiesResponse) {
Map<String, Object> additionalProperties = new HashMap<>();
additionalProperties.put(resultsFieldName + ".outlier_score",
Collections.singletonMap("type", NumberFieldMapper.NumberType.DOUBLE.typeName()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.elasticsearch.xpack.core.ml.dataframe.analyses;

import org.elasticsearch.Version;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Randomness;
Expand Down Expand Up @@ -298,7 +299,7 @@ public List<FieldCardinalityConstraint> getFieldCardinalityConstraints() {
}

@Override
public Map<String, Object> getExplicitlyMappedFields(Map<String, Object> mappingsProperties, String resultsFieldName) {
public Map<String, Object> getExplicitlyMappedFields(String resultsFieldName, FieldCapabilitiesResponse fieldCapabilitiesResponse) {
Map<String, Object> additionalProperties = new HashMap<>();
additionalProperties.put(resultsFieldName + ".feature_importance", FEATURE_IMPORTANCE_MAPPING);
// Prediction field should be always mapped as "double" rather than "float" in order to increase precision in case of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.Version;
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
Expand Down Expand Up @@ -44,6 +46,7 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.util.Collections.singletonMap;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -361,64 +364,51 @@ public void testFieldCardinalityLimitsIsNonEmpty() {
assertThat(constraints.get(0).getUpperBound(), equalTo(30L));
}

public void testGetExplicitlyMappedFields() {
assertThat(new Classification("foo").getExplicitlyMappedFields(null, "results"),
equalTo(Collections.singletonMap("results.feature_importance", Classification.FEATURE_IMPORTANCE_MAPPING)));
assertThat(new Classification("foo").getExplicitlyMappedFields(Collections.emptyMap(), "results"),
equalTo(Collections.singletonMap("results.feature_importance", Classification.FEATURE_IMPORTANCE_MAPPING)));
assertThat(
new Classification("foo").getExplicitlyMappedFields(Collections.singletonMap("foo", "not_a_map"), "results"),
equalTo(Collections.singletonMap("results.feature_importance", Classification.FEATURE_IMPORTANCE_MAPPING)));
Map<String, Object> expectedTopClassesMapping = new HashMap<String, Object>() {{
put("type", "nested");
put("properties", new HashMap<String, Object>() {{
put("class_name", Collections.singletonMap("bar", "baz"));
put("class_probability", Collections.singletonMap("type", "double"));
}});
}};
Map<String, Object> explicitlyMappedFields = new Classification("foo").getExplicitlyMappedFields(
Collections.singletonMap("foo", Collections.singletonMap("bar", "baz")),
"results");
assertThat(explicitlyMappedFields, hasEntry("results.foo_prediction", Collections.singletonMap("bar", "baz")));
assertThat(explicitlyMappedFields, hasEntry("results.top_classes", expectedTopClassesMapping));
assertThat(explicitlyMappedFields, hasEntry("results.feature_importance", Classification.FEATURE_IMPORTANCE_MAPPING));
public void testGetExplicitlyMappedFields_FieldCapabilitiesResponseIsNull() {
Map<String, Object> explicitlyMappedFields = new Classification("foo").getExplicitlyMappedFields("results", null);
assertThat(explicitlyMappedFields, equalTo(singletonMap("results.feature_importance", Classification.FEATURE_IMPORTANCE_MAPPING)));
}

expectedTopClassesMapping = new HashMap<String, Object>() {{
public void testGetExplicitlyMappedFields_DependentVariableMappingIsAbsent() {
FieldCapabilitiesResponse fieldCapabilitiesResponse = new FieldCapabilitiesResponse(new String[0], Collections.emptyMap());
Map<String, Object> explicitlyMappedFields =
new Classification("foo").getExplicitlyMappedFields("results", fieldCapabilitiesResponse);
assertThat(explicitlyMappedFields, equalTo(singletonMap("results.feature_importance", Classification.FEATURE_IMPORTANCE_MAPPING)));
}

public void testGetExplicitlyMappedFields_DependentVariableMappingHasNoTypes() {
FieldCapabilitiesResponse fieldCapabilitiesResponse =
new FieldCapabilitiesResponse(new String[0], Collections.singletonMap("foo", Collections.emptyMap()));
Map<String, Object> explicitlyMappedFields =
new Classification("foo").getExplicitlyMappedFields("results", fieldCapabilitiesResponse);
assertThat(explicitlyMappedFields, equalTo(singletonMap("results.feature_importance", Classification.FEATURE_IMPORTANCE_MAPPING)));
}

public void testGetExplicitlyMappedFields_DependentVariableMappingIsPresent() {
Map<String, Object> expectedTopClassesMapping = new HashMap<String, Object>() {{
put("type", "nested");
put("properties", new HashMap<String, Object>() {{
put("class_name", Collections.singletonMap("type", "long"));
put("class_probability", Collections.singletonMap("type", "double"));
put("class_name", singletonMap("type", "dummy"));
put("class_probability", singletonMap("type", "double"));
}});
}};
explicitlyMappedFields = new Classification("foo").getExplicitlyMappedFields(
new HashMap<String, Object>() {{
put("foo", new HashMap<String, String>() {{
put("type", "alias");
put("path", "bar");
}});
put("bar", Collections.singletonMap("type", "long"));
}},
"results");
assertThat(explicitlyMappedFields, hasEntry("results.foo_prediction", Collections.singletonMap("type", "long")));
FieldCapabilitiesResponse fieldCapabilitiesResponse =
new FieldCapabilitiesResponse(
new String[0],
Collections.singletonMap("foo", Collections.singletonMap("dummy", createFieldCapabilities("foo", "dummy"))));
Map<String, Object> explicitlyMappedFields =
new Classification("foo").getExplicitlyMappedFields("results", fieldCapabilitiesResponse);
assertThat(explicitlyMappedFields, hasEntry("results.foo_prediction", singletonMap("type", "dummy")));
assertThat(explicitlyMappedFields, hasEntry("results.top_classes", expectedTopClassesMapping));
assertThat(explicitlyMappedFields, hasEntry("results.feature_importance", Classification.FEATURE_IMPORTANCE_MAPPING));

assertThat(
new Classification("foo").getExplicitlyMappedFields(
Collections.singletonMap("foo", new HashMap<String, String>() {{
put("type", "alias");
put("path", "missing");
}}),
"results"),
equalTo(Collections.singletonMap("results.feature_importance", Classification.FEATURE_IMPORTANCE_MAPPING)));
}

public void testToXContent_GivenVersionBeforeRandomizeSeedWasIntroduced() throws IOException {
Classification classification = createRandom();
assertThat(classification.getRandomizeSeed(), is(notNullValue()));

try (XContentBuilder builder = JsonXContent.contentBuilder()) {
classification.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", "7.5.0")));
classification.toXContent(builder, new ToXContent.MapParams(singletonMap("version", "7.5.0")));
String json = Strings.toString(builder);
assertThat(json, not(containsString("randomize_seed")));
}
Expand All @@ -429,7 +419,7 @@ public void testToXContent_GivenVersionAfterRandomizeSeedWasIntroduced() throws
assertThat(classification.getRandomizeSeed(), is(notNullValue()));

try (XContentBuilder builder = JsonXContent.contentBuilder()) {
classification.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", Version.CURRENT.toString())));
classification.toXContent(builder, new ToXContent.MapParams(singletonMap("version", Version.CURRENT.toString())));
String json = Strings.toString(builder);
assertThat(json, containsString("randomize_seed"));
}
Expand All @@ -440,7 +430,7 @@ public void testToXContent_GivenVersionIsNull() throws IOException {
assertThat(classification.getRandomizeSeed(), is(notNullValue()));

try (XContentBuilder builder = JsonXContent.contentBuilder()) {
classification.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", null)));
classification.toXContent(builder, new ToXContent.MapParams(singletonMap("version", null)));
String json = Strings.toString(builder);
assertThat(json, containsString("randomize_seed"));
}
Expand Down Expand Up @@ -519,4 +509,8 @@ public Long getCardinality(String field) {
return fieldCardinalities.get(field);
}
}

private static FieldCapabilities createFieldCapabilities(String field, String type) {
return new FieldCapabilities(field, type, true, true, null, null, null, Collections.emptyMap());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ public void testFieldCardinalityLimitsIsEmpty() {
}

public void testGetExplicitlyMappedFields() {
Map<String, Object> mappedFields = createTestInstance().getExplicitlyMappedFields(null, "test");
Map<String, Object> mappedFields = createTestInstance().getExplicitlyMappedFields("test", null);
assertThat(mappedFields.size(), equalTo(2));
assertThat(mappedFields, hasKey("test.outlier_score"));
assertThat(mappedFields.get("test.outlier_score"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ public void testFieldCardinalityLimitsIsEmpty() {
}

public void testGetExplicitlyMappedFields() {
Map<String, Object> explicitlyMappedFields = new Regression("foo").getExplicitlyMappedFields(null, "results");
Map<String, Object> explicitlyMappedFields = new Regression("foo").getExplicitlyMappedFields("results", null);
assertThat(explicitlyMappedFields, hasEntry("results.foo_prediction", Collections.singletonMap("type", "double")));
assertThat(explicitlyMappedFields, hasEntry("results.feature_importance", Regression.FEATURE_IMPORTANCE_MAPPING));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,11 @@ public void testWithOnlyTrainingRowsAndTrainingPercentIsFifty_DependentVariableI
assertThat(e.getMessage(), startsWith("field [text-field] of type [text] is non-aggregatable"));
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/60759" )
public void testWithOnlyTrainingRowsAndTrainingPercentIsFifty_DependentVariableIsTextAndKeyword() throws Exception {
testWithOnlyTrainingRowsAndTrainingPercentIsFifty(
"classification_training_percent_is_50_text_and_keyword", TEXT_FIELD + ".keyword", KEYWORD_FIELD_VALUES, "keyword");
}

public void testWithOnlyTrainingRowsAndTrainingPercentIsFifty_DependentVariableIsBoolean() throws Exception {
testWithOnlyTrainingRowsAndTrainingPercentIsFifty(
"classification_training_percent_is_50_boolean", BOOLEAN_FIELD, BOOLEAN_FIELD_VALUES, "boolean");
Expand Down Expand Up @@ -852,7 +856,12 @@ private static void createIndex(String index, boolean isDatastream) {
" \"type\": \"integer\"\n" +
" }," +
" \""+ TEXT_FIELD + "\": {\n" +
" \"type\": \"text\"\n" +
" \"type\": \"text\",\n" +
" \"fields\": {" +
" \"keyword\": {" +
" \"type\": \"keyword\"\n" +
" }" +
" }" +
" }," +
" \""+ KEYWORD_FIELD + "\": {\n" +
" \"type\": \"keyword\"\n" +
Expand Down

0 comments on commit de668ab

Please sign in to comment.