Skip to content

Commit

Permalink
[SPARK-21580][SQL] Integers in aggregation expressions are wrongly ta…
Browse files Browse the repository at this point in the history
…ken as group-by ordinal

## What changes were proposed in this pull request?

create temporary view data as select * from values
(1, 1),
(1, 2),
(2, 1),
(2, 2),
(3, 1),
(3, 2)
as data(a, b);

`select 3, 4, sum(b) from data group by 1, 2;`
`select 3 as c, 4 as d, sum(b) from data group by c, d;`
When running these two cases, the following exception occurred:
`Error in query: GROUP BY position 4 is not in select list (valid range is [1, 3]); line 1 pos 10`

The cause of this failure:
If an aggregateExpression is integer, after replaced with this aggregateExpression, the
groupExpression still considered as an ordinal.

The solution:
This bug is due to re-entrance of an analyzed plan. We can solve it by using `resolveOperators` in `SubstituteUnresolvedOrdinals`.

## How was this patch tested?
Added unit test case

Author: liuxian <liu.xian3@zte.com.cn>

Closes #18779 from 10110346/groupby.
  • Loading branch information
10110346 authored and gatorsmile committed Aug 5, 2017
1 parent 6cbd18c commit 894d5a4
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class SubstituteUnresolvedOrdinals(conf: SQLConf) extends Rule[LogicalPlan] {
case _ => false
}

def apply(plan: LogicalPlan): LogicalPlan = plan transform {
def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) =>
val newOrders = s.order.map {
case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _, _) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ select count(a), a from (select 1 as a) tmp group by 2 having a > 0;
-- mixed cases: group-by ordinals and aliases
select a, a AS k, count(b) from data group by k, 1;

-- turn of group by ordinal
-- turn off group by ordinal
set spark.sql.groupByOrdinal=false;

-- can now group by negative literal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -557,4 +557,20 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
}
assert(e.message.contains("aggregate functions are not allowed in GROUP BY"))
}

test("SPARK-21580 ints in aggregation expressions are taken as group-by ordinal.") {
checkAnswer(
testData2.groupBy(lit(3), lit(4)).agg(lit(6), lit(7), sum("b")),
Seq(Row(3, 4, 6, 7, 9)))
checkAnswer(
testData2.groupBy(lit(3), lit(4)).agg(lit(6), 'b, sum("b")),
Seq(Row(3, 4, 6, 1, 3), Row(3, 4, 6, 2, 6)))

checkAnswer(
spark.sql("SELECT 3, 4, SUM(b) FROM testData2 GROUP BY 1, 2"),
Seq(Row(3, 4, 9)))
checkAnswer(
spark.sql("SELECT 3 AS c, 4 AS d, SUM(b) FROM testData2 GROUP BY c, d"),
Seq(Row(3, 4, 9)))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2023,4 +2023,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
assert(df1.join(df2, $"t1.i" === $"t2.i").cache().count() == 1)
}
}

test("order-by ordinal.") {
checkAnswer(
testData2.select(lit(7), 'a, 'b).orderBy(lit(1), lit(2), lit(3)),
Seq(Row(7, 1, 1), Row(7, 1, 2), Row(7, 2, 1), Row(7, 2, 2), Row(7, 3, 1), Row(7, 3, 2)))
}
}

0 comments on commit 894d5a4

Please sign in to comment.