Skip to content

Commit

Permalink
Add more unit test and comments
Browse files Browse the repository at this point in the history
  • Loading branch information
chenghao-intel committed Jul 13, 2015
1 parent c695760 commit 34def69
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -193,28 +193,34 @@ class Analyzer(
}

def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case a if !a.childrenResolved => a // be sure all of the children are resolved.
case a: Cube =>
GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations)
case a: Rollup =>
GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations)
case x: GroupingSets =>
val gid = AttributeReference(VirtualColumn.groupingIdName, IntegerType, false)()

// the complex expression (non-attribute expressions) in the GROUP BY keys
val nonAttributeGroupByExpression = new ArrayBuffer[Alias]()
val groupByExprPairs = x.groupByExprs.map(_ match {
case e: NamedExpression => (e, e)
case other => {
val alias = Alias(other, other.toString)()
nonAttributeGroupByExpression += alias
(other, alias.toAttribute)
(other, alias.toAttribute) // (Aliased complex expression, the associated attribute)
}
})

// substitute the complex expression for aggregations.
val aggregation = x.aggregations.map(expr => expr.transformDown {
case e => groupByExprPairs.find(_._1.semanticEquals(e)).map(_._2).getOrElse(e)
}.asInstanceOf[NamedExpression])

// substitute the group by expressions.
val newGroupByExprs = groupByExprPairs.map(_._2)

// add an additional projection if contains the complex expression in the GROUP BY keys
val child = if (nonAttributeGroupByExpression.length > 0) {
Project(x.child.output ++ nonAttributeGroupByExpression, x.child)
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
1 NULL -3 2
1 NULL -1 2
1 NULL 3 2
1 NULL 4 2
1 NULL 5 2
1 NULL 6 2
1 NULL 12 2
1 NULL 14 2
1 NULL 15 2
1 NULL 22 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
1 NULL -3 2
1 NULL -1 2
1 NULL 3 2
1 NULL 4 2
1 NULL 5 2
1 NULL 6 2
1 NULL 12 2
1 NULL 14 2
1 NULL 15 2
1 NULL 22 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
500 NULL 0
91 0 1
84 1 1
105 2 1
113 3 1
107 4 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
1 0 5 3
1 0 15 3
1 0 25 3
1 0 60 3
1 0 75 3
1 0 80 3
1 0 100 3
1 0 140 3
1 0 145 3
1 0 150 3
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
1 0 5 3
1 0 15 3
1 0 25 3
1 0 60 3
1 0 75 3
1 0 80 3
1 0 100 3
1 0 140 3
1 0 145 3
1 0 150 3
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,60 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
}
}

createQueryTest("SPARK-8976 Wrong Result for Rollup",
createQueryTest("SPARK-8976 Wrong Result for Rollup #1",
"""
SELECT count(*) AS cnt, key % 5,GROUPING__ID FROM src group by key%5 WITH ROLLUP
""".stripMargin)

createQueryTest("SPARK-8976 Wrong Result for Rollup #2",
"""
SELECT
count(*) AS cnt,
key % 5 as k1,
key-5 as k2,
GROUPING__ID as k3
FROM src group by key%5, key-5
WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
""".stripMargin)

createQueryTest("SPARK-8976 Wrong Result for Rollup #3",
"""
SELECT
count(*) AS cnt,
key % 5 as k1,
key-5 as k2,
GROUPING__ID as k3
FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5
WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
""".stripMargin)

createQueryTest("SPARK-8976 Wrong Result for CUBE #1",
"""
SELECT count(*) AS cnt, key % 5,GROUPING__ID FROM src group by key%5 WITH CUBE
""".stripMargin)

createQueryTest("SPARK-8976 Wrong Result for CUBE #2",
"""
SELECT
count(*) AS cnt,
key % 5 as k1,
key-5 as k2,
GROUPING__ID as k3
FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5
WITH CUBE ORDER BY cnt, k1, k2, k3 LIMIT 10
""".stripMargin)

createQueryTest("SPARK-8976 Wrong Result for GroupingSet",
"""
SELECT
count(*) AS cnt,
key % 5 as k1,
key-5 as k2,
GROUPING__ID as k3
FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5
GROUPING SETS (key%5, key-5) ORDER BY cnt, k1, k2, k3 LIMIT 10
""".stripMargin)

createQueryTest("insert table with generator with column name",
"""
| CREATE TABLE gen_tmp (key Int);
Expand Down

0 comments on commit 34def69

Please sign in to comment.