Skip to content

Commit

Permalink
[SPARK-12480][FOLLOW-UP] use a single column vararg for hash
Browse files Browse the repository at this point in the history
address comments in #10435

This makes the API easier to use if user programmatically generate the call to hash, and they will get analysis exception if the arguments of hash is empty.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #10588 from cloud-fan/hash.
  • Loading branch information
cloud-fan authored and rxin committed Jan 5, 2016
1 parent 9a6ba7e commit 7676833
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 3 deletions.
12 changes: 12 additions & 0 deletions python/pyspark/sql/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1018,6 +1018,18 @@ def sha2(col, numBits):
return Column(jc)


@since(2.0)
def hash(*cols):
"""Calculates the hash code of given columns, and returns the result as a int column.
>>> sqlContext.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
[Row(hash=1358996357)]
"""
sc = SparkContext._active_spark_context
jc = sc._jvm.functions.hash(_to_seq(sc, cols, _to_java_column))
return Column(jc)


# ---------------------- String/Binary functions ------------------------------

_string_functions = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ case class Murmur3Hash(children: Seq[Expression], seed: Int) extends Expression

override def checkInputDataTypes(): TypeCheckResult = {
if (children.isEmpty) {
TypeCheckResult.TypeCheckFailure("arguments of function hash cannot be empty")
TypeCheckResult.TypeCheckFailure("function hash requires at least one argument")
} else {
TypeCheckResult.TypeCheckSuccess
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
assertError(Coalesce(Seq('intField, 'booleanField)),
"input to function coalesce should all be the same type")
assertError(Coalesce(Nil), "input to function coalesce cannot be empty")
assertError(new Murmur3Hash(Nil), "function hash requires at least one argument")
assertError(Explode('intField),
"input to function explode should be array or map type")
}
Expand Down
4 changes: 2 additions & 2 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1820,8 +1820,8 @@ object functions extends LegacyFunctions {
* @since 2.0
*/
@scala.annotation.varargs
def hash(col: Column, cols: Column*): Column = withExpr {
new Murmur3Hash((col +: cols).map(_.expr))
def hash(cols: Column*): Column = withExpr {
new Murmur3Hash(cols.map(_.expr))
}

//////////////////////////////////////////////////////////////////////////////////////////////
Expand Down

0 comments on commit 7676833

Please sign in to comment.