From 4cf0391aab1bc6e6bed990121f0316179b5bbf30 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Sat, 2 Feb 2019 22:05:47 +0200 Subject: [PATCH] SQL: Generate relevant error message when grouping functions are not used in GROUP BY (#38017) * Add checks for Grouping functions restriction to be placed inside GROUP BY * Fixed bug where GROUP BY HISTOGRAM (not using alias) wasn't recognized properly in the Verifier due to functions equality not working correctly. (cherry picked from commit 6968f0925b3304a66516916cf69d4bcb9749ea30) --- .../sql/qa/src/main/resources/agg.csv-spec | 23 ++++++++++++ .../xpack/sql/analysis/analyzer/Verifier.java | 20 +++++++++++ .../function/grouping/GroupingFunction.java | 10 ------ .../function/grouping/Histogram.java | 17 +++++---- .../analyzer/VerifierErrorMessagesTests.java | 35 +++++++++++++++++++ 5 files changed, 89 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec index 7cc8e6a6ef80a..7f039d9c4951e 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec @@ -319,6 +319,29 @@ SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h 0 |10 ; +histogramGroupByWithoutAlias +schema::h:ts|c:l +SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY HISTOGRAM(birth_date, INTERVAL 1 YEAR) ORDER BY h DESC; + + h | c +--------------------+--------------- +1964-02-02T00:00:00Z|5 +1963-02-07T00:00:00Z|7 +1962-02-12T00:00:00Z|6 +1961-02-17T00:00:00Z|8 +1960-02-23T00:00:00Z|7 +1959-02-28T00:00:00Z|9 +1958-03-05T00:00:00Z|6 +1957-03-10T00:00:00Z|6 +1956-03-15T00:00:00Z|4 +1955-03-21T00:00:00Z|4 +1954-03-26T00:00:00Z|7 +1953-03-31T00:00:00Z|10 +1952-04-05T00:00:00Z|10 +1951-04-11T00:00:00Z|1 +null |10 +; + countAll schema::all_names:l|c:l SELECT COUNT(ALL first_name) all_names, COUNT(*) c FROM test_emp; diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java index c945e03bc2757..ed9bd1f106830 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java @@ -23,6 +23,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.Max; import org.elasticsearch.xpack.sql.expression.function.aggregate.Min; import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits; +import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute; import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction; import org.elasticsearch.xpack.sql.expression.predicate.conditional.ConditionalFunction; @@ -229,6 +230,7 @@ Collection verify(LogicalPlan plan) { validateInExpression(p, localFailures); validateConditional(p, localFailures); + checkGroupingFunctionInGroupBy(p, localFailures); checkFilterOnAggs(p, localFailures); checkFilterOnGrouping(p, localFailures); @@ -586,6 +588,24 @@ private static boolean checkGroupMatch(Expression e, Node source, List localFailures) { + // check if the query has a grouping function (Histogram) but no GROUP BY + if (p instanceof Project) { + Project proj = (Project) p; + proj.projections().forEach(e -> e.forEachDown(f -> + localFailures.add(fail(f, "[{}] needs to be part of the grouping", Expressions.name(f))), GroupingFunction.class)); + } else if (p instanceof Aggregate) { + // if it does have a GROUP BY, check if the groupings contain the grouping functions (Histograms) + Aggregate a = (Aggregate) p; + a.aggregates().forEach(agg -> agg.forEachDown(e -> { + if (a.groupings().size() == 0 + || Expressions.anyMatch(a.groupings(), g -> g instanceof Function && e.functionEquals((Function) g)) == false) { + localFailures.add(fail(e, "[{}] needs to be part of the grouping", Expressions.name(e))); + } + }, GroupingFunction.class)); + } + } private static void checkFilterOnAggs(LogicalPlan p, Set localFailures) { if (p instanceof Filter) { diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/GroupingFunction.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/GroupingFunction.java index 0595e29176a93..b8a6bb4054095 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/GroupingFunction.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/GroupingFunction.java @@ -57,16 +57,6 @@ public GroupingFunctionAttribute toAttribute() { return lazyAttribute; } - @Override - public final GroupingFunction replaceChildren(List newChildren) { - if (newChildren.size() != 1) { - throw new IllegalArgumentException("expected [1] child but received [" + newChildren.size() + "]"); - } - return replaceChild(newChildren.get(0)); - } - - protected abstract GroupingFunction replaceChild(Expression newChild); - @Override protected Pipe makePipe() { // unresolved AggNameInput (should always get replaced by the folder) diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java index 3dd4bdc992cb0..23061bfea1859 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/grouping/Histogram.java @@ -10,12 +10,14 @@ import org.elasticsearch.xpack.sql.expression.Expressions; import org.elasticsearch.xpack.sql.expression.Expressions.ParamOrdinal; import org.elasticsearch.xpack.sql.expression.Literal; -import org.elasticsearch.xpack.sql.tree.Source; import org.elasticsearch.xpack.sql.tree.NodeInfo; +import org.elasticsearch.xpack.sql.tree.Source; import org.elasticsearch.xpack.sql.type.DataType; import org.elasticsearch.xpack.sql.type.DataTypes; import java.time.ZoneId; +import java.util.Collections; +import java.util.List; import java.util.Objects; public class Histogram extends GroupingFunction { @@ -24,8 +26,8 @@ public class Histogram extends GroupingFunction { private final ZoneId zoneId; public Histogram(Source source, Expression field, Expression interval, ZoneId zoneId) { - super(source, field); - this.interval = (Literal) interval; + super(source, field, Collections.singletonList(interval)); + this.interval = Literal.of(interval); this.zoneId = zoneId; } @@ -51,10 +53,13 @@ protected TypeResolution resolveType() { return resolution; } - + @Override - protected GroupingFunction replaceChild(Expression newChild) { - return new Histogram(source(), newChild, interval, zoneId); + public final GroupingFunction replaceChildren(List newChildren) { + if (newChildren.size() != 2) { + throw new IllegalArgumentException("expected [2] children but received [" + newChildren.size() + "]"); + } + return new Histogram(source(), newChildren.get(0), newChildren.get(1), zoneId); } @Override diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java index 2fa5946700a79..c603a6b97cae0 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java @@ -539,6 +539,41 @@ public void testAggsInHistogram() { assertEquals("1:47: Cannot use an aggregate [MAX] for grouping", error("SELECT MAX(date) FROM test GROUP BY HISTOGRAM(MAX(int), 1)")); } + + public void testHistogramNotInGrouping() { + assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping", + error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test")); + } + + public void testHistogramNotInGroupingWithCount() { + assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping", + error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h, COUNT(*) FROM test")); + } + + public void testHistogramNotInGroupingWithMaxFirst() { + assertEquals("1:19: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping", + error("SELECT MAX(date), HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test")); + } + + public void testHistogramWithoutAliasNotInGrouping() { + assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping", + error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) FROM test")); + } + + public void testTwoHistogramsNotInGrouping() { + assertEquals("1:48: [HISTOGRAM(date, INTERVAL 1 DAY)] needs to be part of the grouping", + error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h, HISTOGRAM(date, INTERVAL 1 DAY) FROM test GROUP BY h")); + } + + public void testHistogramNotInGrouping_WithGroupByField() { + assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping", + error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) FROM test GROUP BY date")); + } + + public void testScalarOfHistogramNotInGrouping() { + assertEquals("1:14: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping", + error("SELECT MONTH(HISTOGRAM(date, INTERVAL 1 MONTH)) FROM test")); + } public void testErrorMessageForPercentileWithSecondArgBasedOnAField() { assertEquals("1:8: Second argument of PERCENTILE must be a constant, received [ABS(int)]",