From 191c7016924a04d2460b88a032b780702e8044af Mon Sep 17 00:00:00 2001 From: Wilson Wu Date: Sat, 26 Dec 2015 21:21:43 -0800 Subject: [PATCH] [SPARK-12460][SQL]Add ExpressionDescription to aggregate functions --- .../spark/sql/catalyst/expressions/aggregate/Average.scala | 2 ++ .../spark/sql/catalyst/expressions/aggregate/Corr.scala | 2 ++ .../spark/sql/catalyst/expressions/aggregate/Count.scala | 2 ++ .../spark/sql/catalyst/expressions/aggregate/First.scala | 2 ++ .../expressions/aggregate/HyperLogLogPlusPlus.scala | 2 ++ .../spark/sql/catalyst/expressions/aggregate/Kurtosis.scala | 2 ++ .../spark/sql/catalyst/expressions/aggregate/Last.scala | 2 ++ .../spark/sql/catalyst/expressions/aggregate/Max.scala | 2 ++ .../spark/sql/catalyst/expressions/aggregate/Min.scala | 3 ++- .../spark/sql/catalyst/expressions/aggregate/Skewness.scala | 2 ++ .../spark/sql/catalyst/expressions/aggregate/Stddev.scala | 6 ++++++ .../spark/sql/catalyst/expressions/aggregate/Sum.scala | 2 ++ .../spark/sql/catalyst/expressions/aggregate/Variance.scala | 4 ++++ 13 files changed, 32 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala index 94ac4bf09b90b..f1ad1fe7d03d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala @@ -23,6 +23,8 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the average of the values in a group.") case class Average(child: Expression) extends DeclarativeAggregate { override def prettyName: String = "avg" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala index d25f3335ffd93..c25aa0740494d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala @@ -29,6 +29,8 @@ import org.apache.spark.sql.types._ * Definition of Pearson correlation can be found at * http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient */ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the Pearson Correlation Coefficient for two columns..") case class Corr( left: Expression, right: Expression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala index 663c69e799fbd..e688f0c3d67b1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala @@ -21,6 +21,8 @@ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the number of items in a group.") case class Count(children: Seq[Expression]) extends DeclarativeAggregate { override def nullable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala index 35f57426feaf2..be66828207c95 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala @@ -28,6 +28,8 @@ import org.apache.spark.sql.types._ * is used) its result will not be deterministic (unless the input table is sorted and has * a single partition, and we use a single reducer to do the aggregation.). */ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the first value in a group.") case class First(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { def this(child: Expression) = this(child, Literal.create(false, BooleanType)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala index e1fd22e36764e..6e98b75889bc1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala @@ -48,6 +48,8 @@ import org.apache.spark.sql.types._ * @param relativeSD the maximum estimation error allowed. */ // scalastyle:on +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the approximate number of distinct items in a group.") case class HyperLogLogPlusPlus( child: Expression, relativeSD: Double = 0.05, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Kurtosis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Kurtosis.scala index c2bf2cb94116c..300a5dd46008a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Kurtosis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Kurtosis.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.expressions._ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the kurtosis of the values in a group.") case class Kurtosis(child: Expression, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala index be7e12d7a2336..696360a216658 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala @@ -28,6 +28,8 @@ import org.apache.spark.sql.types._ * is used) its result will not be deterministic (unless the input table is sorted and has * a single partition, and we use a single reducer to do the aggregation.). */ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the last value in a group.") case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { def this(child: Expression) = this(child, Literal.create(false, BooleanType)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala index 906003188d4ff..ea9880006b668 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala @@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the maximum value of the expression in a group.") case class Max(child: Expression) extends DeclarativeAggregate { override def children: Seq[Expression] = child :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala index 39f7afbd081cd..a73c32a1580a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala @@ -22,7 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ - +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the minimum value of the expression in a group.") case class Min(child: Expression) extends DeclarativeAggregate { override def children: Seq[Expression] = child :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Skewness.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Skewness.scala index 9411bcea2539a..a1d64d660c06c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Skewness.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Skewness.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.expressions._ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the skewness of the values in a group.") case class Skewness(child: Expression, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Stddev.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Stddev.scala index eec79a9033e36..bd19bbd8a546a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Stddev.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Stddev.scala @@ -19,6 +19,9 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.expressions._ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the sample standard deviation of " + + "the expression in a group.") case class StddevSamp(child: Expression, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) @@ -50,6 +53,9 @@ case class StddevSamp(child: Expression, } } +@ExpressionDescription( +usage = "_FUNC_(column) - Aggregate function: returns the population standard deviation of" + +" the expression in a group.") case class StddevPop( child: Expression, mutableAggBufferOffset: Int = 0, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala index 08a67ea3df51d..57e4872c26a04 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala @@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the sum of all values in the expression.") case class Sum(child: Expression) extends DeclarativeAggregate { override def children: Seq[Expression] = child :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Variance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Variance.scala index cf3a740305391..ab86fb1b78fed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Variance.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Variance.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.expressions._ +@ExpressionDescription( + usage = "_FUNC_(column) -Aggregate function: returns the unbiased variance of the values in a group.") case class VarianceSamp(child: Expression, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) @@ -50,6 +52,8 @@ case class VarianceSamp(child: Expression, } } +@ExpressionDescription( + usage = "_FUNC_(column) - Aggregate function: returns the population variance of the values in a group.") case class VariancePop( child: Expression, mutableAggBufferOffset: Int = 0,