Skip to content

Commit

Permalink
SQL: Fix issues with GROUP BY queries (#41964)
Browse files Browse the repository at this point in the history
Translate to an agg query even if only literals are selected,
so that the correct number of rows is returned (number of buckets).

Fix issue with key only in GROUP BY (not in select) and WHERE clause:
Resolve aggregates and groupings based on the child plan which holds
the info info for all the fields of the underlying table.

Fixes: #41951
Fixes: #41413
(cherry picked from commit 45b8580)
  • Loading branch information
codebird authored and matriv committed Feb 15, 2020
1 parent b729157 commit 5b32d11
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 9 deletions.
36 changes: 36 additions & 0 deletions x-pack/plugin/sql/qa/src/main/resources/agg.sql-spec
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,26 @@ countDistinctAlias
SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp;
countDistinctAndCountSimpleWithAlias
SELECT COUNT(*) cnt, COUNT(DISTINCT first_name) as names, gender FROM test_emp GROUP BY gender ORDER BY gender;
aliasedCountWithFunctionFilterAndGroupBy
SELECT COUNT(*) as c FROM test_emp WHERE ABS(salary) > 0 GROUP BY gender ORDER BY gender;
countWithFunctionFilterAndGroupBy
SELECT COUNT(*) FROM test_emp WHERE ABS(salary) > 0 GROUP BY gender ORDER BY gender;
aliasedCountWithMultiFunctionFilterAndGroupBy
SELECT COUNT(*) as c FROM test_emp WHERE ABS(salary) > 0 AND YEAR(birth_date) > 1980 GROUP BY gender ORDER BY gender;
countWithMultiFunctionFilterAndGroupBy
SELECT COUNT(*) FROM test_emp WHERE ABS(salary) > 0 AND YEAR(birth_date) > 1980 GROUP BY gender ORDER BY gender;
aliasedCountWithFunctionFilterAndMultiGroupBy
SELECT COUNT(*) as c FROM test_emp WHERE ABS(salary) > 0 GROUP BY gender, salary ORDER BY gender;
countWithFunctionFilterAndMultiGroupBy
SELECT COUNT(*) FROM test_emp WHERE ABS(salary) > 0 GROUP BY gender, salary ORDER BY gender;
aliasedCountWithMultiFunctionFilterAndMultiGroupBy
SELECT COUNT(*) as c FROM test_emp WHERE ABS(salary) > 0 AND YEAR(birth_date) > 1980 GROUP BY gender, salary ORDER BY gender;
countWithMultiFunctionFilterAndMultiGroupBy
SELECT COUNT(*) FROM test_emp WHERE ABS(salary) > 0 AND YEAR(birth_date) > 1980 GROUP BY gender, salary ORDER BY gender;
aliasedCountLiteralColumnWithFunctionFilterAndMultiGroupBy
SELECT 1, gender as g, COUNT(*) as c FROM test_emp WHERE ABS(salary) > 0 GROUP BY g, salary ORDER BY gender;
aliasedCountLiteralColumnWithFunctionFilterAndMultiGroupByWithFunction
SELECT 1, gender as g, COUNT(*) as c FROM test_emp WHERE ABS(salary) > 0 GROUP BY g, YEAR(birth_date) ORDER BY gender, YEAR(birth_date);

aggCountAliasAndWhereClauseMultiGroupBy
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages;
Expand Down Expand Up @@ -563,6 +583,22 @@ SELECT MIN(salary) min, MAX(salary) max, gender g, languages l, COUNT(*) c FROM
// group by with literal
implicitGroupByWithLiteral
SELECT 10, MAX("salary") FROM test_emp;
literalWithGroupBy
SELECT 1 FROM test_emp GROUP BY gender;
literalsWithGroupBy
SELECT 1, 2 FROM test_emp GROUP BY gender;
aliasedLiteralWithGroupBy
SELECT 1 AS s FROM test_emp GROUP BY gender;
aliasedLiteralsWithGroupBy
SELECT 1 AS s, 2 FROM test_emp GROUP BY gender;
literalsWithMultipleGroupBy
SELECT 1, 2 FROM test_emp GROUP BY gender, salary;
divisionLiteralsAdditionWithMultipleGroupBy
SELECT 144 / 12 AS division, 1, 2 AS x, 1 + 2 AS addition FROM test_emp GROUP BY gender, salary;
aliasedLiteralsWithMultipleGroupBy
SELECT 1 as s, 2 FROM test_emp GROUP BY gender, salary;
aliasedLiteralsWithMultipleGroupByWithFunction
SELECT 1 as s, 2 FROM test_emp GROUP BY gender, YEAR(birth_date);
implicitGroupByWithLiterals
SELECT 10, 'foo', MAX("salary"), 20, 'bar' FROM test_emp;
groupByWithLiteral
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ else if (plan instanceof Aggregate) {
List<Expression> groupings = a.groupings();
List<Expression> newGroupings = new ArrayList<>();
AttributeMap<Expression> resolved = Expressions.aliases(a.aggregates());

boolean changed = false;
for (Expression grouping : groupings) {
if (grouping instanceof UnresolvedAttribute) {
Expand Down Expand Up @@ -618,7 +619,7 @@ protected LogicalPlan rule(LogicalPlan plan) {
for (Order or : o.order()) {
maybeResolved.add(or.resolved() ? or : tryResolveExpression(or, child));
}

Stream<Order> referencesStream = maybeResolved.stream()
.filter(Expression::resolved);

Expand All @@ -629,7 +630,7 @@ protected LogicalPlan rule(LogicalPlan plan) {
// a + 1 in SELECT is actually Alias("a + 1", a + 1) and translates to ReferenceAttribute
// in the output. However it won't match the unnamed a + 1 despite being the same expression
// so explicitly compare the source

// if there's a match, remove the item from the reference stream
if (Expressions.hasReferenceAttribute(child.outputSet())) {
final Map<Attribute, Expression> collectRefs = new LinkedHashMap<>();
Expand Down Expand Up @@ -720,6 +721,18 @@ protected LogicalPlan rule(LogicalPlan plan) {
}
}

// Try to resolve aggregates and groupings based on the child plan
if (plan instanceof Aggregate) {
Aggregate a = (Aggregate) plan;
LogicalPlan child = a.child();
List<Expression> newGroupings = new ArrayList<>(a.groupings().size());
a.groupings().forEach(e -> newGroupings.add(tryResolveExpression(e, child)));
List<NamedExpression> newAggregates = new ArrayList<>(a.aggregates().size());
a.aggregates().forEach(e -> newAggregates.add(tryResolveExpression(e, child)));
if (newAggregates.equals(a.aggregates()) == false || newGroupings.equals(a.groupings()) == false) {
return new Aggregate(a.source(), child, newGroupings, newAggregates);
}
}
return plan;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1130,7 +1130,8 @@ protected LogicalPlan rule(LogicalPlan plan) {

plan.forEachDown(a -> {
List<Object> values = extractConstants(a.aggregates());
if (values.size() == a.aggregates().size() && isNotQueryWithFromClauseAndFilterFoldedToFalse(a)) {
if (values.size() == a.aggregates().size() && a.groupings().isEmpty()
&& isNotQueryWithFromClauseAndFilterFoldedToFalse(a)) {
optimizedPlan.set(new LocalRelation(a.source(), new SingletonExecutable(a.output(), values.toArray())));
}
}, Aggregate.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,20 +393,21 @@ protected PhysicalPlan rule(AggregateExec a) {
}
return a;
}

static EsQueryExec fold(AggregateExec a, EsQueryExec exec) {

QueryContainer queryC = exec.queryContainer();

// track aliases defined in the SELECT and used inside GROUP BY
// SELECT x AS a ... GROUP BY a
Map<Attribute, Expression> aliasMap = new LinkedHashMap<>();
String id = null;
for (NamedExpression ne : a.aggregates()) {
if (ne instanceof Alias) {
aliasMap.put(ne.toAttribute(), ((Alias) ne).child());
}
}

if (aliasMap.isEmpty() == false) {
Map<Attribute, Expression> newAliases = new LinkedHashMap<>(queryC.aliases());
newAliases.putAll(aliasMap);
Expand Down Expand Up @@ -451,7 +452,7 @@ static EsQueryExec fold(AggregateExec a, EsQueryExec exec) {
target = ((Alias) ne).child();
}

String id = Expressions.id(target);
id = Expressions.id(target);

// literal
if (target.foldable()) {
Expand Down Expand Up @@ -587,7 +588,14 @@ else if (target.foldable()) {
}
}
}

// If we're only selecting literals, we have to still execute the aggregation to create
// the correct grouping buckets, in order to return the appropriate number of rows
if (a.aggregates().stream().allMatch(e -> e.anyMatch(Expression::foldable))) {
for (Expression grouping : a.groupings()) {
GroupByKey matchingGroup = groupingContext.groupFor(grouping);
queryC = queryC.addColumn(new GroupByRef(matchingGroup.id(), null, false), id);
}
}
return new EsQueryExec(exec.source(), exec.index(), a.output(), queryC);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
import org.elasticsearch.xpack.ql.expression.Expression;
import org.elasticsearch.xpack.ql.expression.FieldAttribute;
import org.elasticsearch.xpack.ql.expression.Literal;
import org.elasticsearch.xpack.ql.expression.function.aggregate.Count;
import org.elasticsearch.xpack.ql.expression.gen.script.ScriptTemplate;
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.GreaterThan;
import org.elasticsearch.xpack.ql.index.EsIndex;
import org.elasticsearch.xpack.ql.index.IndexResolution;
import org.elasticsearch.xpack.ql.plan.logical.Aggregate;
Expand Down Expand Up @@ -130,6 +132,34 @@ public void testTermEqualityAnalyzer() {
assertEquals("value", tq.value());
}

public void testAliasAndGroupByResolution(){
LogicalPlan p = plan("SELECT COUNT(*) AS c FROM test WHERE ABS(int) > 0 GROUP BY int");
assertTrue(p instanceof Aggregate);
Aggregate a = (Aggregate) p;
LogicalPlan pc = ((Aggregate) p).child();
assertTrue(pc instanceof Filter);
Expression condition = ((Filter) pc).condition();
assertEquals("GREATERTHAN", ((GreaterThan) condition).functionName());
List<Expression> groupings = a.groupings();
assertTrue(groupings.get(0).resolved());
assertEquals("c", a.aggregates().get(0).name());
assertEquals("COUNT", ((Count) ((Alias) a.aggregates().get(0)).child()).functionName());
}
public void testLiteralWithGroupBy(){
LogicalPlan p = plan("SELECT 1 as t, 2 FROM test GROUP BY int");
assertTrue(p instanceof Aggregate);
Aggregate a = (Aggregate) p;
List<Expression> groupings = a.groupings();
assertEquals(1, groupings.size());
assertTrue(groupings.get(0).resolved());
assertTrue(groupings.get(0) instanceof FieldAttribute);
assertEquals(2, a.aggregates().size());
assertEquals("t", a.aggregates().get(0).name());
assertTrue(((Alias) a.aggregates().get(0)).child() instanceof Literal);
assertEquals("1", ((Alias) a.aggregates().get(0)).child().toString());
assertEquals("2", ((Alias) a.aggregates().get(1)).child().toString());
}

public void testTermEqualityNotAnalyzed() {
LogicalPlan p = plan("SELECT some.string FROM test WHERE int = 5");
assertTrue(p instanceof Project);
Expand Down

0 comments on commit 5b32d11

Please sign in to comment.