Skip to content

Commit

Permalink
SQL: Enhance checks for inexact fields (#39427)
Browse files Browse the repository at this point in the history
For functions: move checks for `text` fields without underlying `keyword`
fields or with many of them (ambiguity) to the type resolution stage.

For Order By/Group By: move checks to the `Verifier` to catch early
before `QueryTranslator` or execution.

Closes: #38501
Fixes: #35203
  • Loading branch information
matriv committed Mar 1, 2019
1 parent 015d386 commit 2c39bb3
Show file tree
Hide file tree
Showing 44 changed files with 509 additions and 282 deletions.
22 changes: 11 additions & 11 deletions x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -2338,25 +2338,25 @@ Arumugam
////////////
limitationSubSelect
// tag::limitationSubSelect
SELECT * FROM (SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%') WHERE first_name LIKE 'A%';
SELECT * FROM (SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%') WHERE first_name LIKE 'A%' ORDER BY 1;

first_name | last_name
---------------+---------------
Anneke |Preusig
Alejandro |McAlpine
Anoosh |Peyn
Arumugam |Ossenbruggen
Alejandro |McAlpine
Anneke |Preusig
Anoosh |Peyn
Arumugam |Ossenbruggen
// end::limitationSubSelect
;

limitationSubSelect
limitationSubSelectRewritten
// tag::limitationSubSelectRewritten
SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%' AND first_name LIKE 'A%';
SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%' AND first_name LIKE 'A%' ORDER BY 1;
// end::limitationSubSelectRewritten
first_name | last_name
---------------+---------------
Anneke |Preusig
Alejandro |McAlpine
Anoosh |Peyn
Arumugam |Ossenbruggen
Alejandro |McAlpine
Anneke |Preusig
Anoosh |Peyn
Arumugam |Ossenbruggen
;
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.elasticsearch.xpack.sql.stats.Metrics;
import org.elasticsearch.xpack.sql.tree.Node;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.EsField;
import org.elasticsearch.xpack.sql.util.StringUtils;

import java.util.ArrayList;
Expand Down Expand Up @@ -294,7 +295,8 @@ Collection<Failure> verify(LogicalPlan plan) {
*/
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
Map<String, Function> resolvedFunctions, Set<LogicalPlan> groupingFailures) {
return checkGroupByAgg(p, localFailures, resolvedFunctions)
return checkGroupByInexactField(p, localFailures)
&& checkGroupByAgg(p, localFailures, resolvedFunctions)
&& checkGroupByOrder(p, localFailures, groupingFailures)
&& checkGroupByHaving(p, localFailures, groupingFailures, resolvedFunctions);
}
Expand Down Expand Up @@ -463,6 +465,21 @@ private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Set<Expressio
return false;
}

private static boolean checkGroupByInexactField(LogicalPlan p, Set<Failure> localFailures) {
if (p instanceof Aggregate) {
Aggregate a = (Aggregate) p;

// The grouping can not be an aggregate function or an inexact field (e.g. text without a keyword)
a.groupings().forEach(e -> e.forEachUp(c -> {
EsField.Exact exact = c.getExactInfo();
if (exact.hasExact() == false) {
localFailures.add(fail(c, "Field of data type [" + c.dataType().typeName + "] cannot be used for grouping; " +
exact.errorMsg()));
}
}, FieldAttribute.class));
}
return true;
}

// check whether plain columns specified in an agg are mentioned in the group-by
private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures, Map<String, Function> functions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

public abstract class SourceGenerator {

private SourceGenerator() {}

private static final List<String> NO_STORED_FIELD = singletonList(StoredFieldsContext._NONE_);

public static SearchSourceBuilder sourceBuilder(QueryContainer container, QueryBuilder filter, Integer size) {
Expand Down Expand Up @@ -107,8 +109,7 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source

// sorting only works on not-analyzed fields - look for a multi-field replacement
if (attr instanceof FieldAttribute) {
FieldAttribute fa = (FieldAttribute) attr;
fa = fa.isInexact() ? fa.exactAttribute() : fa;
FieldAttribute fa = ((FieldAttribute) attr).exactAttribute();

sortBuilder = fieldSort(fa.name())
.missing(as.missing().position())
Expand All @@ -125,7 +126,8 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
if (nestedSort == null) {
fieldSort.setNestedSort(newSort);
} else {
for (; nestedSort.getNestedSort() != null; nestedSort = nestedSort.getNestedSort()) {
while (nestedSort.getNestedSort() != null) {
nestedSort = nestedSort.getNestedSort();
}
nestedSort.setNestedSort(newSort);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,16 @@
package org.elasticsearch.xpack.sql.expression;

import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.expression.Expression.TypeResolution;
import org.elasticsearch.xpack.sql.expression.gen.pipeline.Pipe;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypes;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.StringJoiner;
import java.util.function.Predicate;

import static java.util.Collections.emptyList;
import static java.util.Collections.emptyMap;
import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
import static org.elasticsearch.xpack.sql.type.DataType.BOOLEAN;

public final class Expressions {

Expand Down Expand Up @@ -154,55 +148,4 @@ public static List<Pipe> pipe(List<Expression> expressions) {
}
return pipes;
}

public static TypeResolution typeMustBeBoolean(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, dt -> dt == BOOLEAN, operationName, paramOrd, "boolean");
}

public static TypeResolution typeMustBeInteger(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isInteger, operationName, paramOrd, "integer");
}

public static TypeResolution typeMustBeNumeric(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isNumeric, operationName, paramOrd, "numeric");
}

public static TypeResolution typeMustBeString(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isString, operationName, paramOrd, "string");
}

public static TypeResolution typeMustBeDate(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isDateBased, operationName, paramOrd, "date", "datetime");
}

public static TypeResolution typeMustBeNumericOrDate(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, dt -> dt.isNumeric() || dt.isDateBased(), operationName, paramOrd, "date", "datetime", "numeric");
}

public static TypeResolution typeMustBe(Expression e,
Predicate<DataType> predicate,
String operationName,
ParamOrdinal paramOrd,
String... acceptedTypes) {
return predicate.test(e.dataType()) || DataTypes.isNull(e.dataType())?
TypeResolution.TYPE_RESOLVED :
new TypeResolution(format(null, "[{}]{} argument must be [{}], found value [{}] type [{}]",
operationName,
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : " " + paramOrd.name().toLowerCase(Locale.ROOT),
acceptedTypesForErrorMsg(acceptedTypes),
Expressions.name(e),
e.dataType().typeName));
}

private static String acceptedTypesForErrorMsg(String... acceptedTypes) {
StringJoiner sj = new StringJoiner(", ");
for (int i = 0; i < acceptedTypes.length - 1; i++) {
sj.add(acceptedTypes[i]);
}
if (acceptedTypes.length > 1) {
return sj.toString() + " or " + acceptedTypes[acceptedTypes.length - 1];
} else {
return acceptedTypes[0];
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,13 @@ public FieldAttribute nestedParent() {
return nestedParent;
}

public boolean isInexact() {
return field.isExact() == false;
public EsField.Exact getExactInfo() {
return field.getExactInfo();
}

public FieldAttribute exactAttribute() {
if (field.isExact() == false) {
EsField exactField = field.getExactField();
if (exactField.equals(field) == false) {
return innerField(field.getExactField());
}
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
*/
package org.elasticsearch.xpack.sql.expression;

import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.type.DataType;

import java.util.List;
import java.util.Objects;

import static java.util.Collections.singletonList;
import static org.elasticsearch.xpack.sql.expression.TypeResolutions.isExact;

public class Order extends Expression {

Expand Down Expand Up @@ -45,6 +46,11 @@ public Nullability nullable() {
return Nullability.FALSE;
}

@Override
protected TypeResolution resolveType() {
return isExact(child, "ORDER BY cannot be applied to field of data type [{}]: {}");
}

@Override
public DataType dataType() {
return child.dataType();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression;

import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypes;
import org.elasticsearch.xpack.sql.type.EsField;

import java.util.Locale;
import java.util.StringJoiner;
import java.util.function.Predicate;

import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
import static org.elasticsearch.xpack.sql.expression.Expression.TypeResolution;
import static org.elasticsearch.xpack.sql.expression.Expressions.ParamOrdinal;
import static org.elasticsearch.xpack.sql.expression.Expressions.name;
import static org.elasticsearch.xpack.sql.type.DataType.BOOLEAN;

public final class TypeResolutions {

private TypeResolutions() {}

public static TypeResolution isBoolean(Expression e, String operationName, ParamOrdinal paramOrd) {
return isType(e, dt -> dt == BOOLEAN, operationName, paramOrd, "boolean");
}

public static TypeResolution isInteger(Expression e, String operationName, ParamOrdinal paramOrd) {
return isType(e, DataType::isInteger, operationName, paramOrd, "integer");
}

public static TypeResolution isNumeric(Expression e, String operationName, ParamOrdinal paramOrd) {
return isType(e, DataType::isNumeric, operationName, paramOrd, "numeric");
}

public static TypeResolution isString(Expression e, String operationName, ParamOrdinal paramOrd) {
return isType(e, DataType::isString, operationName, paramOrd, "string");
}

public static TypeResolution isDate(Expression e, String operationName, ParamOrdinal paramOrd) {
return isType(e, DataType::isDateBased, operationName, paramOrd, "date", "datetime");
}

public static TypeResolution isNumericOrDate(Expression e, String operationName, ParamOrdinal paramOrd) {
return isType(e, dt -> dt.isNumeric() || dt.isDateBased(), operationName, paramOrd, "date", "datetime", "numeric");
}

public static TypeResolution isExact(Expression e, String message) {
if (e instanceof FieldAttribute) {
EsField.Exact exact = ((FieldAttribute) e).getExactInfo();
if (exact.hasExact() == false) {
return new TypeResolution(format(null, message, e.dataType().typeName, exact.errorMsg()));
}
}
return TypeResolution.TYPE_RESOLVED;
}

public static TypeResolution isExact(Expression e, String operationName, ParamOrdinal paramOrd) {
if (e instanceof FieldAttribute) {
EsField.Exact exact = ((FieldAttribute) e).getExactInfo();
if (exact.hasExact() == false) {
return new TypeResolution(format(null, "[{}] cannot operate on {}field of data type [{}]: {}",
operationName,
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ?
"" : paramOrd.name().toLowerCase(Locale.ROOT) + " argument ",
e.dataType().typeName, exact.errorMsg()));
}
}
return TypeResolution.TYPE_RESOLVED;
}

public static TypeResolution isStringAndExact(Expression e, String operationName, ParamOrdinal paramOrd) {
TypeResolution resolution = isString(e, operationName, paramOrd);
if (resolution.unresolved()) {
return resolution;
}

return isExact(e, operationName, paramOrd);
}

public static TypeResolution isFoldable(Expression e, String operationName, ParamOrdinal paramOrd) {
if (!e.foldable()) {
return new TypeResolution(format(null, "{}argument of [{}] must be a constant, received [{}]",
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
operationName,
Expressions.name(e)));
}
return TypeResolution.TYPE_RESOLVED;
}

public static TypeResolution isNotFoldable(Expression e, String operationName, ParamOrdinal paramOrd) {
if (e.foldable()) {
return new TypeResolution(format(null, "{}argument of [{}] must be a table column, found constant [{}]",
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
operationName,
Expressions.name(e)));
}
return TypeResolution.TYPE_RESOLVED;
}

public static TypeResolution isType(Expression e,
Predicate<DataType> predicate,
String operationName,
ParamOrdinal paramOrd,
String... acceptedTypes) {
return predicate.test(e.dataType()) || DataTypes.isNull(e.dataType())?
TypeResolution.TYPE_RESOLVED :
new TypeResolution(format(null, "{}argument of [{}] must be [{}], found value [{}] type [{}]",
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
operationName,
acceptedTypesForErrorMsg(acceptedTypes),
name(e),
e.dataType().typeName));
}

private static String acceptedTypesForErrorMsg(String... acceptedTypes) {
StringJoiner sj = new StringJoiner(", ");
for (int i = 0; i < acceptedTypes.length - 1; i++) {
sj.add(acceptedTypes[i]);
}
if (acceptedTypes.length > 1) {
return sj.toString() + " or " + acceptedTypes[acceptedTypes.length - 1];
} else {
return acceptedTypes[0];
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.Expressions;
import org.elasticsearch.xpack.sql.expression.TypeResolutions;
import org.elasticsearch.xpack.sql.expression.function.Function;
import org.elasticsearch.xpack.sql.expression.gen.pipeline.AggNameInput;
import org.elasticsearch.xpack.sql.expression.gen.pipeline.Pipe;
Expand Down Expand Up @@ -78,8 +80,13 @@ public boolean equals(Object obj) {
&& Objects.equals(other.parameters(), parameters());
}

@Override
protected TypeResolution resolveType() {
return TypeResolutions.isExact(field, sourceText(), Expressions.ParamOrdinal.DEFAULT);
}

@Override
public int hashCode() {
return Objects.hash(field(), parameters());
}
}
}
Loading

0 comments on commit 2c39bb3

Please sign in to comment.