From c1661675df87163c49d6fc3692a984e93cd12eab Mon Sep 17 00:00:00 2001 From: Shilei Date: Fri, 12 Jun 2015 14:20:17 +0800 Subject: [PATCH 1/9] Add md5 function --- .../catalyst/analysis/FunctionRegistry.scala | 3 ++ .../spark/sql/catalyst/expressions/misc.scala | 52 +++++++++++++++++++ .../expressions/MiscFunctionsSuite.scala | 32 ++++++++++++ .../org/apache/spark/sql/functions.scala | 19 +++++++ .../spark/sql/DataFrameFunctionsSuite.scala | 11 ++++ 5 files changed, 117 insertions(+) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 04e306da23e4c..4d20576b8539f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -130,6 +130,9 @@ object FunctionRegistry { expression[ToDegrees]("degrees"), expression[ToRadians]("radians"), + // misc functions + expression[Md5]("md5"), + // aggregate functions expression[Average]("avg"), expression[Count]("count"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala new file mode 100644 index 0000000000000..fbdceb72c1ae7 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import java.security.MessageDigest + +import org.apache.commons.codec.digest.DigestUtils +import org.apache.spark.sql.types.{BinaryType, StringType, DataType} +import org.apache.spark.unsafe.types.UTF8String + +/** + * A function that calculates an MD5 128-bit checksum for the string or binary. + * Defined for String and Binary types. + */ +case class Md5(child: Expression) + extends UnaryExpression with ExpectsInputTypes { + + override def dataType: DataType = StringType + + override def expectedChildTypes: Seq[DataType] = + if (child.dataType == BinaryType) Seq(BinaryType) else Seq(StringType) + + override def children: Seq[Expression] = child :: Nil + + override def eval(input: Row): Any = { + val value = child.eval(input) + if (value == null) { + null + } else if (child.dataType == BinaryType) { + UTF8String.fromString(DigestUtils.md5Hex(value.asInstanceOf[Array[Byte]])) + } else { + UTF8String.fromString(DigestUtils.md5Hex(value.asInstanceOf[UTF8String].getBytes)) + } + } + + override def toString: String = s"md5($child)" +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala new file mode 100644 index 0000000000000..f3dec71c5b756 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.dsl.expressions._ + +class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { + + test("md5") { + val s1 = 'a.string.at(0) + val s2 = 'a.binary.at(0) + checkEvaluation(Md5(s1), "902fbdd2b1df0c4f70b4a5d23525e932", create_row("ABC")) + checkEvaluation(Md5(s2), "6ac1e56bc78f031059be7be854522c4c", create_row(Array[Byte](1,2,3,4,5,6))) + } + +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c5b77724aae17..ab37576415a41 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -36,6 +36,7 @@ import org.apache.spark.util.Utils * @groupname sort_funcs Sorting functions * @groupname normal_funcs Non-aggregate functions * @groupname math_funcs Math functions + * @groupname misc_funcs Misc functions * @groupname window_funcs Window functions * @groupname string_funcs String functions * @groupname Ungrouped Support functions for DataFrames. @@ -1334,6 +1335,24 @@ object functions { */ def toRadians(columnName: String): Column = toRadians(Column(columnName)) + ////////////////////////////////////////////////////////////////////////////////////////////// + // Misc functions + ////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Calculates an MD5 128-bit checksum for the string or binary + * @group misc_funcs + * @since 1.5.0 + */ + def md5(e: Column): Column = Md5(e.expr) + + /** + * Calculates an MD5 128-bit checksum for the string or binary + * @group misc_funcs + * @since 1.5.0 + */ + def md5(columnName: String): Column = md5(Column(columnName)) + ////////////////////////////////////////////////////////////////////////////////////////////// // String functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index cfd23867a9bba..6d7750248aa73 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -123,6 +123,17 @@ class DataFrameFunctionsSuite extends QueryTest { Row("x", "y", null)) } + test("misc md5 function") { + val df = Seq(("ABC", Array[Byte](1, 2, 3, 4, 5, 6))).toDF("a", "b") + checkAnswer( + df.select(md5($"a"), md5("b")), + Row("902fbdd2b1df0c4f70b4a5d23525e932", "6ac1e56bc78f031059be7be854522c4c")) + + checkAnswer( + df.selectExpr("md5(a)", "md5(b)"), + Row("902fbdd2b1df0c4f70b4a5d23525e932", "6ac1e56bc78f031059be7be854522c4c")) + } + test("string length function") { checkAnswer( nullStrings.select(strlen($"s"), strlen("s")), From b8c73b4b339a14e3c96980680aa0c91febe803c4 Mon Sep 17 00:00:00 2001 From: Shilei Date: Fri, 12 Jun 2015 15:22:20 +0800 Subject: [PATCH 2/9] Rewrite the type check for Md5 --- .../spark/sql/catalyst/expressions/misc.scala | 16 +++++++++++----- .../expressions/MiscFunctionsSuite.scala | 3 ++- .../scala/org/apache/spark/sql/functions.scala | 4 ++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index fbdceb72c1ae7..c3eac1bc9731c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions import java.security.MessageDigest import org.apache.commons.codec.digest.DigestUtils +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.types.{BinaryType, StringType, DataType} import org.apache.spark.unsafe.types.UTF8String @@ -27,13 +28,18 @@ import org.apache.spark.unsafe.types.UTF8String * A function that calculates an MD5 128-bit checksum for the string or binary. * Defined for String and Binary types. */ -case class Md5(child: Expression) - extends UnaryExpression with ExpectsInputTypes { +case class Md5(child: Expression) extends UnaryExpression { override def dataType: DataType = StringType - override def expectedChildTypes: Seq[DataType] = - if (child.dataType == BinaryType) Seq(BinaryType) else Seq(StringType) + override def checkInputDataTypes(): TypeCheckResult = + if (child.dataType == StringType || child.dataType == BinaryType) { + TypeCheckResult.TypeCheckSuccess + } else { + TypeCheckResult.TypeCheckFailure( + s"types error in ${this.getClass.getSimpleName} " + + s"get (${child.dataType}, expect StringType or BinaryType).") + } override def children: Seq[Expression] = child :: Nil @@ -48,5 +54,5 @@ case class Md5(child: Expression) } } - override def toString: String = s"md5($child)" + override def toString: String = s"MD5($child)" } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala index f3dec71c5b756..6a9fa1264339e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala @@ -26,7 +26,8 @@ class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { val s1 = 'a.string.at(0) val s2 = 'a.binary.at(0) checkEvaluation(Md5(s1), "902fbdd2b1df0c4f70b4a5d23525e932", create_row("ABC")) - checkEvaluation(Md5(s2), "6ac1e56bc78f031059be7be854522c4c", create_row(Array[Byte](1,2,3,4,5,6))) + checkEvaluation(Md5(s2), "6ac1e56bc78f031059be7be854522c4c", + create_row(Array[Byte](1, 2, 3, 4, 5, 6))) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index ab37576415a41..21b41a0c33f2c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1340,14 +1340,14 @@ object functions { ////////////////////////////////////////////////////////////////////////////////////////////// /** - * Calculates an MD5 128-bit checksum for the string or binary + * Calculates the MD5 digest and returns the value as a 32 character hex string. * @group misc_funcs * @since 1.5.0 */ def md5(e: Column): Column = Md5(e.expr) /** - * Calculates an MD5 128-bit checksum for the string or binary + * Calculates the MD5 digest and returns the value as a 32 character hex string. * @group misc_funcs * @since 1.5.0 */ From 60ccde1a59861e07bab7e0558bf7c93df963f4fe Mon Sep 17 00:00:00 2001 From: Shilei Date: Mon, 15 Jun 2015 10:43:51 +0800 Subject: [PATCH 3/9] Add more test case --- .../apache/spark/sql/catalyst/expressions/misc.scala | 6 +++--- .../sql/catalyst/expressions/MiscFunctionsSuite.scala | 11 +++++------ .../main/scala/org/apache/spark/sql/functions.scala | 2 ++ 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index c3eac1bc9731c..7c6639498fa1e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -25,8 +25,8 @@ import org.apache.spark.sql.types.{BinaryType, StringType, DataType} import org.apache.spark.unsafe.types.UTF8String /** - * A function that calculates an MD5 128-bit checksum for the string or binary. - * Defined for String and Binary types. + * A function that calculates an MD5 128-bit checksum and returns it as a hex string + * For input of type [[StringType]] or [[BinaryType]] */ case class Md5(child: Expression) extends UnaryExpression { @@ -43,7 +43,7 @@ case class Md5(child: Expression) extends UnaryExpression { override def children: Seq[Expression] = child :: Nil - override def eval(input: Row): Any = { + override def eval(input: InternalRow): Any = { val value = child.eval(input) if (value == null) { null diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala index 6a9fa1264339e..1179a59bae323 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala @@ -18,16 +18,15 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.types.{StringType, BinaryType} class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("md5") { - val s1 = 'a.string.at(0) - val s2 = 'a.binary.at(0) - checkEvaluation(Md5(s1), "902fbdd2b1df0c4f70b4a5d23525e932", create_row("ABC")) - checkEvaluation(Md5(s2), "6ac1e56bc78f031059be7be854522c4c", - create_row(Array[Byte](1, 2, 3, 4, 5, 6))) + checkEvaluation(Md5(Literal("ABC")), "902fbdd2b1df0c4f70b4a5d23525e932") + checkEvaluation(Md5(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)), "6ac1e56bc78f031059be7be854522c4c") + checkEvaluation(Md5(Literal.create(null, BinaryType)), null) + checkEvaluation(Md5(Literal.create(null, StringType)), null) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 21b41a0c33f2c..0797bc48958a0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1341,6 +1341,7 @@ object functions { /** * Calculates the MD5 digest and returns the value as a 32 character hex string. + * * @group misc_funcs * @since 1.5.0 */ @@ -1348,6 +1349,7 @@ object functions { /** * Calculates the MD5 digest and returns the value as a 32 character hex string. + * * @group misc_funcs * @since 1.5.0 */ From 1df0b5bb12cfe69435fe12ae514648b22733b2eb Mon Sep 17 00:00:00 2001 From: Shilei Date: Mon, 15 Jun 2015 12:47:09 +0800 Subject: [PATCH 4/9] format to scala type --- .../spark/sql/catalyst/expressions/MiscFunctionsSuite.scala | 3 ++- sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala index 1179a59bae323..1f5f69dfef6c1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala @@ -24,7 +24,8 @@ class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("md5") { checkEvaluation(Md5(Literal("ABC")), "902fbdd2b1df0c4f70b4a5d23525e932") - checkEvaluation(Md5(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)), "6ac1e56bc78f031059be7be854522c4c") + checkEvaluation(Md5(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)), + "6ac1e56bc78f031059be7be854522c4c") checkEvaluation(Md5(Literal.create(null, BinaryType)), null) checkEvaluation(Md5(Literal.create(null, StringType)), null) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 0797bc48958a0..c3fcb6818e4fe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1349,7 +1349,7 @@ object functions { /** * Calculates the MD5 digest and returns the value as a 32 character hex string. - * + * * @group misc_funcs * @since 1.5.0 */ From 12c61f441b1aa40a86a6cd4a62a9103542e6e71f Mon Sep 17 00:00:00 2001 From: Shilei Date: Mon, 15 Jun 2015 16:00:52 +0800 Subject: [PATCH 5/9] Accept only BinaryType for Md5 --- .../spark/sql/catalyst/expressions/misc.scala | 15 ++++++++------- .../catalyst/expressions/MiscFunctionsSuite.scala | 3 +-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 7c6639498fa1e..1c9ddf38ee533 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -26,19 +26,22 @@ import org.apache.spark.unsafe.types.UTF8String /** * A function that calculates an MD5 128-bit checksum and returns it as a hex string - * For input of type [[StringType]] or [[BinaryType]] + * For input of type [[BinaryType]] */ -case class Md5(child: Expression) extends UnaryExpression { +case class Md5(child: Expression) + extends UnaryExpression with ExpectsInputTypes { override def dataType: DataType = StringType + override def expectedChildTypes: Seq[DataType] = Seq(BinaryType) + override def checkInputDataTypes(): TypeCheckResult = - if (child.dataType == StringType || child.dataType == BinaryType) { + if (child.dataType == BinaryType) { TypeCheckResult.TypeCheckSuccess } else { TypeCheckResult.TypeCheckFailure( s"types error in ${this.getClass.getSimpleName} " + - s"get (${child.dataType}, expect StringType or BinaryType).") + s"get (${child.dataType}, expect BinaryType).") } override def children: Seq[Expression] = child :: Nil @@ -47,10 +50,8 @@ case class Md5(child: Expression) extends UnaryExpression { val value = child.eval(input) if (value == null) { null - } else if (child.dataType == BinaryType) { + } else{ UTF8String.fromString(DigestUtils.md5Hex(value.asInstanceOf[Array[Byte]])) - } else { - UTF8String.fromString(DigestUtils.md5Hex(value.asInstanceOf[UTF8String].getBytes)) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala index 1f5f69dfef6c1..48b84130b4556 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala @@ -23,11 +23,10 @@ import org.apache.spark.sql.types.{StringType, BinaryType} class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("md5") { - checkEvaluation(Md5(Literal("ABC")), "902fbdd2b1df0c4f70b4a5d23525e932") + checkEvaluation(Md5(Literal("ABC".getBytes)), "902fbdd2b1df0c4f70b4a5d23525e932") checkEvaluation(Md5(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)), "6ac1e56bc78f031059be7be854522c4c") checkEvaluation(Md5(Literal.create(null, BinaryType)), null) - checkEvaluation(Md5(Literal.create(null, StringType)), null) } } From 9509ad0e3090af7f4540f821cfe6d00dddc3032c Mon Sep 17 00:00:00 2001 From: Shilei Date: Tue, 16 Jun 2015 09:45:24 +0800 Subject: [PATCH 6/9] Format code --- .../scala/org/apache/spark/sql/catalyst/expressions/misc.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 1c9ddf38ee533..f65ddce88ea40 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -50,7 +50,7 @@ case class Md5(child: Expression) val value = child.eval(input) if (value == null) { null - } else{ + } else { UTF8String.fromString(DigestUtils.md5Hex(value.asInstanceOf[Array[Byte]])) } } From da60eb313abd5e19b006f17cca654c66c14e6c4a Mon Sep 17 00:00:00 2001 From: Shilei Date: Wed, 17 Jun 2015 10:12:21 +0800 Subject: [PATCH 7/9] Remove checkInputDataTypes function --- .../spark/sql/catalyst/expressions/misc.scala | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index f65ddce88ea40..823d3708d4db2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -17,10 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import java.security.MessageDigest - import org.apache.commons.codec.digest.DigestUtils -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.types.{BinaryType, StringType, DataType} import org.apache.spark.unsafe.types.UTF8String @@ -35,17 +32,6 @@ case class Md5(child: Expression) override def expectedChildTypes: Seq[DataType] = Seq(BinaryType) - override def checkInputDataTypes(): TypeCheckResult = - if (child.dataType == BinaryType) { - TypeCheckResult.TypeCheckSuccess - } else { - TypeCheckResult.TypeCheckFailure( - s"types error in ${this.getClass.getSimpleName} " + - s"get (${child.dataType}, expect BinaryType).") - } - - override def children: Seq[Expression] = child :: Nil - override def eval(input: InternalRow): Any = { val value = child.eval(input) if (value == null) { From 04bd27b04b6010459c9f1a558d5a0b1ac3965ac1 Mon Sep 17 00:00:00 2001 From: Shilei Date: Fri, 19 Jun 2015 09:37:03 +0800 Subject: [PATCH 8/9] Add codegen --- .../org/apache/spark/sql/catalyst/expressions/misc.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 823d3708d4db2..4f427add45871 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.commons.codec.digest.DigestUtils +import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.types.{BinaryType, StringType, DataType} import org.apache.spark.unsafe.types.UTF8String @@ -41,5 +42,9 @@ case class Md5(child: Expression) } } - override def toString: String = s"MD5($child)" + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + defineCodeGen(ctx, ev, c => + s"""org.apache.spark.unsafe.types.UTF8String.fromString + |(org.apache.commons.codec.digest.DigestUtils.md5Hex($c))""".stripMargin) + } } From 11fcdb2a4c8c947d84e7a00d840b1a0896c91c55 Mon Sep 17 00:00:00 2001 From: Shilei Date: Fri, 19 Jun 2015 13:35:45 +0800 Subject: [PATCH 9/9] Fix the indent --- .../org/apache/spark/sql/catalyst/expressions/misc.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 4f427add45871..4bee8cb728e5c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -44,7 +44,7 @@ case class Md5(child: Expression) override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { defineCodeGen(ctx, ev, c => - s"""org.apache.spark.unsafe.types.UTF8String.fromString - |(org.apache.commons.codec.digest.DigestUtils.md5Hex($c))""".stripMargin) + "org.apache.spark.unsafe.types.UTF8String.fromString" + + s"(org.apache.commons.codec.digest.DigestUtils.md5Hex($c))") } }