Skip to content
Permalink
Browse files

[SPARK-27771][SQL] Add SQL description for grouping functions (cube, …

…rollup, grouping and grouping_id)

## What changes were proposed in this pull request?

Both look added as of 2.0 (see SPARK-12541 and SPARK-12706). I referred existing docs and examples in other API docs.

## How was this patch tested?

Manually built the documentation and, by running examples, by running `DESCRIBE FUNCTION EXTENDED`.

Closes #24642 from HyukjinKwon/SPARK-27771.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 2431ab0)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information...
HyukjinKwon authored and dongjoon-hyun committed May 20, 2019
1 parent 7bdcc77 commit 89095f67e12f3a33ef57fd75a41351559cf01863
@@ -38,14 +38,65 @@ trait GroupingSet extends Expression with CodegenFallback {
override def eval(input: InternalRow): Any = throw new UnsupportedOperationException
}

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """
_FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the specified columns
so that we can run aggregation on them.
""",
examples = """
Examples:
> SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
NULL 2 1
NULL NULL 2
Alice 2 1
Bob 5 1
NULL 5 1
Bob NULL 1
Alice NULL 1
""",
since = "2.0.0")
// scalastyle:on line.size.limit
case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """
_FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the specified columns
so that we can run aggregation on them.
""",
examples = """
Examples:
> SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
NULL NULL 2
Alice 2 1
Bob 5 1
Bob NULL 1
Alice NULL 1
""",
since = "2.0.0")
// scalastyle:on line.size.limit
case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}

/**
* Indicates whether a specified column expression in a GROUP BY list is aggregated or not.
* GROUPING returns 1 for aggregated or 0 for not aggregated in the result set.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """
_FUNC_(col) - indicates whether a specified column in a GROUP BY is aggregated or
not, returns 1 for aggregated or 0 for not aggregated in the result set.",
""",
examples = """
Examples:
> SELECT name, _FUNC_(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);
Alice 0 2
NULL 1 7
Bob 0 5
""",
since = "2.0.0")
// scalastyle:on line.size.limit
case class Grouping(child: Expression) extends Expression with Unevaluable {
override def references: AttributeSet = AttributeSet(VirtualColumn.groupingIdAttribute :: Nil)
override def children: Seq[Expression] = child :: Nil
@@ -58,6 +109,29 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
*
* If groupByExprs is empty, it means all grouping expressions in GroupingSets.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """
_FUNC_([col1[, col2 ..]]) - returns the level of grouping, equals to
`(grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)`
""",
examples = """
Examples:
> SELECT name, _FUNC_(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);
NULL 2 2 165.0
Alice 0 2 165.0
NULL 2 5 180.0
NULL 3 7 172.5
Bob 0 5 180.0
Bob 1 5 180.0
Alice 1 2 165.0
""",
note = """
Input columns should match with grouping columns exactly, or empty (means all the grouping
columns).
""",
since = "2.0.0")
// scalastyle:on line.size.limit
case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable {
override def references: AttributeSet = AttributeSet(VirtualColumn.groupingIdAttribute :: Nil)
override def children: Seq[Expression] = groupByExprs

0 comments on commit 89095f6

Please sign in to comment.
You can’t perform that action at this time.