Skip to content

Commit

Permalink
SQL: Perform lazy evaluation of mismatched mappings
Browse files Browse the repository at this point in the history
Move away from performing eager, fail-fast validation of mismatched
 mapping to a lazy evaluation based on the fields actually used in the
 query. This allows queries to run on the parts of the indices that
 "work" which is not just convenient but also a necessity for large
 mappings (like logging) where alignment is hard/impossible to achieve.

Fix #35659
  • Loading branch information
costin committed Nov 18, 2018
1 parent 37ae0c8 commit 2a9f8c7
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypeConversion;
import org.elasticsearch.xpack.sql.type.DataTypes;
import org.elasticsearch.xpack.sql.type.InvalidMappedField;
import org.elasticsearch.xpack.sql.type.UnsupportedEsField;

import java.util.ArrayList;
Expand Down Expand Up @@ -200,8 +201,14 @@ private static Attribute handleSpecialFields(UnresolvedAttribute u, Attribute na
// if it's a object/compound type, keep it unresolved with a nice error message
if (named instanceof FieldAttribute) {
FieldAttribute fa = (FieldAttribute) named;

// incompatible mappings
if (fa.field() instanceof InvalidMappedField) {
named = u.withUnresolvedMessage("Cannot use field [" + fa.name() + "] due to ambiguities being "
+ ((InvalidMappedField) fa.field()).errorMessage());
}
// unsupported types
if (DataTypes.isUnsupported(fa.dataType())) {
else if (DataTypes.isUnsupported(fa.dataType())) {
UnsupportedEsField unsupportedField = (UnsupportedEsField) fa.field();
named = u.withUnresolvedMessage(
"Cannot use field [" + fa.name() + "] type [" + unsupportedField.getOriginalType() + "] as is unsupported");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DateEsField;
import org.elasticsearch.xpack.sql.type.EsField;
import org.elasticsearch.xpack.sql.type.InvalidMappedField;
import org.elasticsearch.xpack.sql.type.KeywordEsField;
import org.elasticsearch.xpack.sql.type.TextEsField;
import org.elasticsearch.xpack.sql.type.Types;
Expand All @@ -51,6 +52,7 @@
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.function.Function;
import java.util.regex.Pattern;

import static java.util.Collections.emptyMap;
Expand Down Expand Up @@ -263,13 +265,21 @@ static IndexResolution mergedMapping(String indexPattern, Map<String, Map<String
// without sorting, they can still be detected however without the emptyMap optimization
// (fields without multi-fields have no children)
for (Entry<String, Map<String, FieldCapabilities>> entry : sortedFields) {

InvalidMappedField invalidField = null;
FieldCapabilities fieldCap = null;
errorMessage.setLength(0);

String name = entry.getKey();

// skip internal fields
if (!name.startsWith("_")) {
Map<String, FieldCapabilities> types = entry.getValue();
// field is mapped differently across indices
if (types.size() > 1) {
// build error message
// build the error message
// and create a MultiTypeField

for (Entry<String, FieldCapabilities> type : types.entrySet()) {
if (errorMessage.length() > 0) {
errorMessage.append(", ");
Expand All @@ -280,44 +290,79 @@ static IndexResolution mergedMapping(String indexPattern, Map<String, Map<String
errorMessage.append(Arrays.toString(type.getValue().indices()));
}

errorMessage.insert(0,
"[" + indexPattern + "] points to indices with incompatible mappings; " +
"field [" + name + "] is mapped in [" + types.size() + "] different ways: ");
}
if (errorMessage.length() > 0) {
return IndexResolution.invalid(errorMessage.toString());
}

FieldCapabilities fieldCap = types.values().iterator().next();
// validate search/agg-able
if (fieldCap.isAggregatable() && fieldCap.nonAggregatableIndices() != null) {
errorMessage.append("[" + indexPattern + "] points to indices with incompatible mappings: ");
errorMessage.append("field [" + name + "] is aggregateable except in ");
errorMessage.append(Arrays.toString(fieldCap.nonAggregatableIndices()));
errorMessage.insert(0, "mapped as [" + types.size() + "] incompatible types: ");

invalidField = new InvalidMappedField(name, errorMessage.toString());
}
if (fieldCap.isSearchable() && fieldCap.nonSearchableIndices() != null) {
// type is okay, check aggregation
else {
fieldCap = types.values().iterator().next();
// validate search/agg-able
if (fieldCap.isAggregatable() && fieldCap.nonAggregatableIndices() != null) {
errorMessage.append("mapped as aggregatable except in ");
errorMessage.append(Arrays.toString(fieldCap.nonAggregatableIndices()));
}
if (fieldCap.isSearchable() && fieldCap.nonSearchableIndices() != null) {
if (errorMessage.length() > 0) {
errorMessage.append(",");
}
errorMessage.append("mapped as searchable except in ");
errorMessage.append(Arrays.toString(fieldCap.nonSearchableIndices()));
}

if (errorMessage.length() > 0) {
errorMessage.append(",");
invalidField = new InvalidMappedField(name, errorMessage.toString());
}
errorMessage.append("[" + indexPattern + "] points to indices with incompatible mappings: ");
errorMessage.append("field [" + name + "] is searchable except in ");
errorMessage.append(Arrays.toString(fieldCap.nonSearchableIndices()));
}
if (errorMessage.length() > 0) {
return IndexResolution.invalid(errorMessage.toString());
}

// validation passes - create the field
// and name wasn't added before
// if the name wasn't added before
final InvalidMappedField invalidF = invalidField;
final FieldCapabilities fieldCapab = fieldCap;
if (!flattedMapping.containsKey(name)) {
createField(name, fieldCap, fieldCaps, hierarchicalMapping, flattedMapping, false);
createField(name, fieldCaps, hierarchicalMapping, flattedMapping, s -> {
return invalidF != null ? invalidF : createField(s, fieldCapab.getType(), emptyMap(), fieldCapab.isAggregatable());
});
}
}
}

return IndexResolution.valid(new EsIndex(indexPattern, hierarchicalMapping));
}

private static EsField createField(String fieldName, Map<String, Map<String, FieldCapabilities>> globalCaps,
Map<String, EsField> hierarchicalMapping, Map<String, EsField> flattedMapping,
Function<String, EsField> field) {

Map<String, EsField> parentProps = hierarchicalMapping;

int dot = fieldName.lastIndexOf('.');
String fullFieldName = fieldName;

if (dot >= 0) {
String parentName = fieldName.substring(0, dot);
fieldName = fieldName.substring(dot + 1);
EsField parent = flattedMapping.get(parentName);
if (parent == null) {
Map<String, FieldCapabilities> map = globalCaps.get(parentName);
if (map == null) {
throw new SqlIllegalArgumentException("Cannot find field {}; this is likely a bug", parentName);
}
FieldCapabilities parentCap = map.values().iterator().next();
parent = createField(parentName, globalCaps, hierarchicalMapping, flattedMapping,
s -> createField(s, parentCap.getType(), new TreeMap<>(), parentCap.isAggregatable()));
}
parentProps = parent.getProperties();
}

EsField esField = field.apply(fieldName);

parentProps.put(fieldName, esField);
flattedMapping.put(fullFieldName, esField);

return esField;
}

private static EsField createField(String fieldName, FieldCapabilities caps, Map<String, Map<String, FieldCapabilities>> globalCaps,
Map<String, EsField> hierarchicalMapping, Map<String, EsField> flattedMapping, boolean hasChildren) {

Expand All @@ -341,34 +386,32 @@ private static EsField createField(String fieldName, FieldCapabilities caps, Map
parentProps = parent.getProperties();
}

EsField field = null;
Map<String, EsField> props = hasChildren ? new TreeMap<>() : emptyMap();
EsField field = createField(fieldName, caps.getType(), props, caps.isAggregatable());

DataType esType = DataType.fromEsType(caps.getType());
parentProps.put(fieldName, field);
flattedMapping.put(fullFieldName, field);

return field;
}

private static EsField createField(String fieldName, String typeName, Map<String, EsField> props, boolean isAggregateable) {
DataType esType = DataType.fromEsType(typeName);
switch (esType) {
case TEXT:
field = new TextEsField(fieldName, props, false);
break;
return new TextEsField(fieldName, props, false);
case KEYWORD:
int length = DataType.KEYWORD.defaultPrecision;
// TODO: to check whether isSearchable/isAggregateable takes into account the presence of the normalizer
boolean normalized = false;
field = new KeywordEsField(fieldName, props, caps.isAggregatable(), length, normalized);
break;
return new KeywordEsField(fieldName, props, isAggregateable, length, normalized);
case DATE:
field = new DateEsField(fieldName, props, caps.isAggregatable());
break;
return new DateEsField(fieldName, props, isAggregateable);
case UNSUPPORTED:
field = new UnsupportedEsField(fieldName, caps.getType());
break;
return new UnsupportedEsField(fieldName, typeName);
default:
field = new EsField(fieldName, esType, props, caps.isAggregatable());
return new EsField(fieldName, esType, props, isAggregateable);
}

parentProps.put(fieldName, field);
flattedMapping.put(fullFieldName, field);

return field;
}

private static FieldCapabilitiesRequest createFieldCapsRequest(String index) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

package org.elasticsearch.xpack.sql.type;

import org.elasticsearch.xpack.sql.analysis.index.MappingException;

import java.util.Objects;

import static java.util.Collections.emptyMap;

/**
* Representation of field mapped differently across indices.
* Used during mapping discovery only.
*/
public class InvalidMappedField extends EsField {

private final String errorMessage;

public InvalidMappedField(String name, String errorMessage) {
super(name, DataType.UNSUPPORTED, emptyMap(), false);
this.errorMessage = errorMessage;
}

public String errorMessage() {
return errorMessage;
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), errorMessage);
}

@Override
public boolean equals(Object obj) {
if (super.equals(obj)) {
InvalidMappedField other = (InvalidMappedField) obj;
return Objects.equals(errorMessage, other.errorMessage);
}

return false;
}

@Override
public EsField getExactField() {
throw new MappingException("Field [" + getName() + "] is invalid, cannot access it");

}

@Override
public boolean isExact() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.elasticsearch.xpack.sql.analysis.AnalysisException;
import org.elasticsearch.xpack.sql.analysis.index.EsIndex;
import org.elasticsearch.xpack.sql.analysis.index.IndexResolution;
import org.elasticsearch.xpack.sql.analysis.index.IndexResolverTests;
import org.elasticsearch.xpack.sql.expression.function.FunctionRegistry;
import org.elasticsearch.xpack.sql.parser.SqlParser;
import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan;
Expand Down Expand Up @@ -46,6 +47,25 @@ private LogicalPlan accept(IndexResolution resolution, String sql) {
return analyzer.analyze(parser.createStatement(sql), true);
}

private IndexResolution incompatible() {
Map<String, EsField> basicMapping = TypesTests.loadMapping("mapping-basic.json", true);
Map<String, EsField> incompatible = TypesTests.loadMapping("mapping-basic-incompatible.json");

assertNotEquals(basicMapping, incompatible);
IndexResolution resolution = IndexResolverTests.merge(new EsIndex("basic", basicMapping),
new EsIndex("incompatible", incompatible));
assertTrue(resolution.isValid());
return resolution;
}

private String incompatibleError(String sql) {
return error(incompatible(), sql);
}

private LogicalPlan incompatibleAccept(String sql) {
return accept(incompatible(), sql);
}

public void testMissingIndex() {
assertEquals("1:17: Unknown index [missing]", error(IndexResolution.notFound("missing"), "SELECT foo FROM missing"));
}
Expand Down Expand Up @@ -366,4 +386,40 @@ public void testInvalidTypeForFunction_WithFourArgs() {
assertEquals("1:8: [INSERT] fourth argument must be [string], found value [3] type [integer]",
error("SELECT INSERT('text', 1, 2, 3)"));
}

public void testAllowCorrectFieldsInIncompatibleMappings() {
assertNotNull(incompatibleAccept("SELECT languages FROM \"*\""));
}

public void testWildcardInIncompatibleMappings() {
assertNotNull(incompatibleAccept("SELECT * FROM \"*\""));
}

public void testMismatchedFieldInIncompatibleMappings() {
assertEquals(
"1:8: Cannot use field [emp_no] due to ambiguities being mapped as [2] incompatible types: "
+ "[integer] in [basic], [long] in [incompatible]",
incompatibleError("SELECT emp_no FROM \"*\""));
}

public void testMismatchedFieldStarInIncompatibleMappings() {
assertEquals(
"1:8: Cannot use field [emp_no] due to ambiguities being mapped as [2] incompatible types: "
+ "[integer] in [basic], [long] in [incompatible]",
incompatibleError("SELECT emp_no.* FROM \"*\""));
}

public void testMismatchedFieldFilterInIncompatibleMappings() {
assertEquals(
"1:33: Cannot use field [emp_no] due to ambiguities being mapped as [2] incompatible types: "
+ "[integer] in [basic], [long] in [incompatible]",
incompatibleError("SELECT languages FROM \"*\" WHERE emp_no > 1"));
}

public void testMismatchedFieldScalarInIncompatibleMappings() {
assertEquals(
"1:45: Cannot use field [emp_no] due to ambiguities being mapped as [2] incompatible types: "
+ "[integer] in [basic], [long] in [incompatible]",
incompatibleError("SELECT languages FROM \"*\" ORDER BY SIGN(ABS(emp_no))"));
}
}

0 comments on commit 2a9f8c7

Please sign in to comment.