From 2dd04f41687edb95d9e1b446cafa4e5e09460cbd Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Sun, 3 Jul 2016 15:45:38 +0530 Subject: [PATCH 01/31] [SPARK-16287][SQL] Implement str_to_map SQL function --- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/complexTypeCreator.scala | 72 ++++++++++++++++++- .../expressions/ComplexTypeSuite.scala | 13 ++++ .../org/apache/spark/sql/functions.scala | 18 +++++ .../spark/sql/DataFrameFunctionsSuite.scala | 35 +++++++++ .../spark/sql/hive/HiveSessionCatalog.scala | 2 +- 6 files changed, 139 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index e7f335f4fb4ed..032b9658fe919 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -226,6 +226,7 @@ object FunctionRegistry { expression[Signum]("signum"), expression[Sin]("sin"), expression[Sinh]("sinh"), + expression[StringToMap]("str_to_map"), expression[Sqrt]("sqrt"), expression[Tan]("tan"), expression[Tanh]("tanh"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index d603d3c73ecbc..8092465908890 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen._ -import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, TypeUtils} +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData, TypeUtils} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -393,3 +393,73 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression override def prettyName: String = "named_struct_unsafe" } + +/** + * Creates a map after splitting the input text into key/value pairs using delimeters + */ +@ExpressionDescription( + usage = """_FUNC_(text[, delimiter1, delimiter2]) - Creates a map after splitting the text into + key/value pairs using delimeters. + Default delimiters are ',' for delimiter1 and '=' for delimiter2.""") +case class StringToMap(child: Expression, delimiter1: Expression, delimiter2: Expression) + extends TernaryExpression with ExpectsInputTypes { + + def this(child: Expression) = { + this(child, Literal(","), Literal("=")) + } + + override def children: Seq[Expression] = Seq(child, delimiter1, delimiter2) + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, StringType) + + override def dataType: DataType = MapType(StringType, StringType, valueContainsNull = false) + + override def foldable: Boolean = child.foldable + + override def nullSafeEval(str: Any, delim1: Any, delim2: Any): Any = { + val array = str.asInstanceOf[UTF8String] + .split(delim1.asInstanceOf[UTF8String], -1) + .map{_.split(delim2.asInstanceOf[UTF8String], 2)} + + ArrayBasedMapData(array.map(_(0)), array.map(_(1))).asInstanceOf[MapData] + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + + nullSafeCodeGen(ctx, ev, (text, delim1, delim2) => { + val arrayClass = classOf[GenericArrayData].getName + val mapClass = classOf[ArrayBasedMapData].getName + val keyArray = ctx.freshName("keyArray") + val valueArray = ctx.freshName("valueArray") + ctx.addMutableState("UTF8String[]", keyArray, s"this.$keyArray = null;") + ctx.addMutableState("UTF8String[]", valueArray, s"this.$valueArray = null;") + + val keyData = s"new $arrayClass($keyArray)" + val valueData = s"new $arrayClass($valueArray)" + + val tempArray = ctx.freshName("tempArray") + val keyValue = ctx.freshName("keyValue") + val i = ctx.freshName("i") + + s""" + UTF8String[] $tempArray = ($text).split(UTF8String.fromString("$delim1"), -1); + + $keyArray = new UTF8String[$tempArray.length]; + $valueArray = new UTF8String[$tempArray.length]; + + for (int $i = 0; $i < $tempArray.length; $i ++) { + UTF8String[] $keyValue = + ($tempArray[$i]).split(UTF8String.fromString("$delim2"), 2); + $keyArray[$i] = $keyValue[0]; + $valueArray[$i] = $keyValue[1]; + } + + ${ev.value} = new $mapClass($keyData, $valueData); + this.$keyArray = null; + this.$valueArray = null; + """ + }) + } + + override def prettyName: String = "str_to_map" +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index ec7be4d4b849d..6133824404f2d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -246,4 +246,17 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { checkMetadata(CreateStructUnsafe(Seq(a, b))) checkMetadata(CreateNamedStructUnsafe(Seq("a", a, "b", b))) } + + test("StringToMap") { + val s0 = Literal("a:1,b:2,c:3") + val m0 = Map("a" -> "1", "b" -> "2", "c" -> "3") + val s1 = Literal("a: ,b:2") + val m1 = Map("a" -> " ", "b" -> "2") + val s2 = Literal("a=1,b=2,c=3") + val m2 = Map("a" -> "1", "b" -> "2", "c" -> "3") + + checkEvaluation(StringToMap(s0, Literal(","), Literal(":")), m0) + checkEvaluation(StringToMap(s1, Literal(","), Literal(":")), m1) + checkEvaluation(new StringToMap(s2), m2) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c8782df146df6..15035ab6c6306 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2470,6 +2470,24 @@ object functions { */ def second(e: Column): Column = withExpr { Second(e.expr) } + /** + * Extracts key, values from input string using ',' as delimiter1 and '=' as delimiter2 + * and creates a new map column. + * @group normal_funcs + * @since 2.1.0 + */ + def str_to_map(e: Column): Column = withExpr { new StringToMap(e.expr) } + + /** + * Extracts key, values from input string using delimiter1 and delimiter2 + * and creates a new map column. + * @group normal_funcs + * @since 2.1.0 + */ + def str_to_map(e: Column, delimiter1: String, delimiter2: String): Column = withExpr { + StringToMap(e.expr, Literal(delimiter1), Literal(delimiter2)) + } + /** * Extracts the week number as an integer from a given date/timestamp/string. * @group datetime_funcs diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 0f6c49e759590..0d7a2286df453 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -404,4 +404,39 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { Seq(Row(true), Row(true)) ) } + + test("str_to_map function") { + val df1 = Seq( + ("a=1,b=2", "y"), + ("a=1,b=2,c=3", "y") + ).toDF("a", "b") + + checkAnswer( + df1.selectExpr("str_to_map(a)"), + Seq( + Row(Map("a" -> "1", "b" -> "2")), + Row(Map("a" -> "1", "b" -> "2", "c" -> "3")) + ) + ) + + checkAnswer( + df1.select(str_to_map($"a")), + Seq( + Row(Map("a" -> "1", "b" -> "2")), + Row(Map("a" -> "1", "b" -> "2", "c" -> "3")) + ) + ) + + val df2 = Seq(("a:1,b:2,c:3", "y")).toDF("a", "b") + + checkAnswer( + df2.selectExpr("str_to_map(a,',',':')"), + Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) + ) + + checkAnswer( + df2.select(str_to_map($"a", ",", ":")), + Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) + ) + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala index 53990b8e3b38f..6d5503d6cfcab 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala @@ -239,7 +239,7 @@ private[sql] class HiveSessionCatalog( // str_to_map, windowingtablefunction. private val hiveFunctions = Seq( "hash", "java_method", "histogram_numeric", - "parse_url", "percentile", "percentile_approx", "reflect", "sentences", "stack", "str_to_map", + "parse_url", "percentile", "percentile_approx", "reflect", "sentences", "stack", "xpath", "xpath_double", "xpath_float", "xpath_int", "xpath_long", "xpath_number", "xpath_short", "xpath_string", From fa294bc04896ed61efb847adc2612feca9cbaf16 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Sun, 3 Jul 2016 15:50:54 +0530 Subject: [PATCH 02/31] fix codeGen --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 8092465908890..488a34d74ad78 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -442,14 +442,14 @@ case class StringToMap(child: Expression, delimiter1: Expression, delimiter2: Ex val i = ctx.freshName("i") s""" - UTF8String[] $tempArray = ($text).split(UTF8String.fromString("$delim1"), -1); + UTF8String[] $tempArray = ($text).split($delim1, -1); $keyArray = new UTF8String[$tempArray.length]; $valueArray = new UTF8String[$tempArray.length]; for (int $i = 0; $i < $tempArray.length; $i ++) { UTF8String[] $keyValue = - ($tempArray[$i]).split(UTF8String.fromString("$delim2"), 2); + ($tempArray[$i]).split($delim2, 2); $keyArray[$i] = $keyValue[0]; $valueArray[$i] = $keyValue[1]; } From 6b2390d2dc141553eef3df9dd34744b9d08ccfdb Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Sun, 3 Jul 2016 19:47:59 +0530 Subject: [PATCH 03/31] Address comments --- .../expressions/complexTypeCreator.scala | 10 +++++----- .../scala/org/apache/spark/sql/functions.scala | 18 ------------------ .../spark/sql/DataFrameFunctionsSuite.scala | 13 ------------- 3 files changed, 5 insertions(+), 36 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 488a34d74ad78..e9b57d39b337d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -398,17 +398,17 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression * Creates a map after splitting the input text into key/value pairs using delimeters */ @ExpressionDescription( - usage = """_FUNC_(text[, delimiter1, delimiter2]) - Creates a map after splitting the text into - key/value pairs using delimeters. - Default delimiters are ',' for delimiter1 and '=' for delimiter2.""") -case class StringToMap(child: Expression, delimiter1: Expression, delimiter2: Expression) + usage = """_FUNC_(text[, pairDelim, keyValueDelim]) - Creates a map after splitting the text into + key/value pairs using delimiters. + Default delimiters are ',' for pairDelim and '=' for keyValueDelim.""") +case class StringToMap(child: Expression, pairDelim: Expression, keyValueDelim: Expression) extends TernaryExpression with ExpectsInputTypes { def this(child: Expression) = { this(child, Literal(","), Literal("=")) } - override def children: Seq[Expression] = Seq(child, delimiter1, delimiter2) + override def children: Seq[Expression] = Seq(child, pairDelim, keyValueDelim) override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, StringType) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 15035ab6c6306..c8782df146df6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2470,24 +2470,6 @@ object functions { */ def second(e: Column): Column = withExpr { Second(e.expr) } - /** - * Extracts key, values from input string using ',' as delimiter1 and '=' as delimiter2 - * and creates a new map column. - * @group normal_funcs - * @since 2.1.0 - */ - def str_to_map(e: Column): Column = withExpr { new StringToMap(e.expr) } - - /** - * Extracts key, values from input string using delimiter1 and delimiter2 - * and creates a new map column. - * @group normal_funcs - * @since 2.1.0 - */ - def str_to_map(e: Column, delimiter1: String, delimiter2: String): Column = withExpr { - StringToMap(e.expr, Literal(delimiter1), Literal(delimiter2)) - } - /** * Extracts the week number as an integer from a given date/timestamp/string. * @group datetime_funcs diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 0d7a2286df453..13f899833c90c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -419,24 +419,11 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { ) ) - checkAnswer( - df1.select(str_to_map($"a")), - Seq( - Row(Map("a" -> "1", "b" -> "2")), - Row(Map("a" -> "1", "b" -> "2", "c" -> "3")) - ) - ) - val df2 = Seq(("a:1,b:2,c:3", "y")).toDF("a", "b") checkAnswer( df2.selectExpr("str_to_map(a,',',':')"), Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) ) - - checkAnswer( - df2.select(str_to_map($"a", ",", ":")), - Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) - ) } } From ca74f9a4227d26618a4448e8c92ccadd20d97dcc Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 5 Jul 2016 23:08:39 +0530 Subject: [PATCH 04/31] address comments --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index e9b57d39b337d..268968438b5e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -401,21 +401,19 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression usage = """_FUNC_(text[, pairDelim, keyValueDelim]) - Creates a map after splitting the text into key/value pairs using delimiters. Default delimiters are ',' for pairDelim and '=' for keyValueDelim.""") -case class StringToMap(child: Expression, pairDelim: Expression, keyValueDelim: Expression) +case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression) extends TernaryExpression with ExpectsInputTypes { def this(child: Expression) = { this(child, Literal(","), Literal("=")) } - override def children: Seq[Expression] = Seq(child, pairDelim, keyValueDelim) + override def children: Seq[Expression] = Seq(text, pairDelim, keyValueDelim) override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, StringType) override def dataType: DataType = MapType(StringType, StringType, valueContainsNull = false) - override def foldable: Boolean = child.foldable - override def nullSafeEval(str: Any, delim1: Any, delim2: Any): Any = { val array = str.asInstanceOf[UTF8String] .split(delim1.asInstanceOf[UTF8String], -1) From f7c03c5a44011949e6c8b444334560c0b61c766e Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 6 Jul 2016 08:50:10 +0530 Subject: [PATCH 05/31] address comments --- .../sql/catalyst/expressions/complexTypeCreator.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 268968438b5e7..a4302f48f8bd1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -419,12 +419,12 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E .split(delim1.asInstanceOf[UTF8String], -1) .map{_.split(delim2.asInstanceOf[UTF8String], 2)} - ArrayBasedMapData(array.map(_(0)), array.map(_(1))).asInstanceOf[MapData] + ArrayBasedMapData(array.map(_(0)), array.map(_(1))) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - nullSafeCodeGen(ctx, ev, (text, delim1, delim2) => { + nullSafeCodeGen(ctx, ev, (text, pairDelim, keyValueDelim) => { val arrayClass = classOf[GenericArrayData].getName val mapClass = classOf[ArrayBasedMapData].getName val keyArray = ctx.freshName("keyArray") @@ -440,14 +440,14 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E val i = ctx.freshName("i") s""" - UTF8String[] $tempArray = ($text).split($delim1, -1); + UTF8String[] $tempArray = ($text).split($pairDelim, -1); $keyArray = new UTF8String[$tempArray.length]; $valueArray = new UTF8String[$tempArray.length]; for (int $i = 0; $i < $tempArray.length; $i ++) { UTF8String[] $keyValue = - ($tempArray[$i]).split($delim2, 2); + ($tempArray[$i]).split($keyValueDelim, 2); $keyArray[$i] = $keyValue[0]; $valueArray[$i] = $keyValue[1]; } From d1573b6c3d4585f8258531571eacf12d4fad1dbc Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 6 Jul 2016 10:38:26 +0530 Subject: [PATCH 06/31] remove ctx.addMutableState --- .../expressions/complexTypeCreator.scala | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index a4302f48f8bd1..28717b967d55e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -427,14 +427,9 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E nullSafeCodeGen(ctx, ev, (text, pairDelim, keyValueDelim) => { val arrayClass = classOf[GenericArrayData].getName val mapClass = classOf[ArrayBasedMapData].getName + val keyArray = ctx.freshName("keyArray") val valueArray = ctx.freshName("valueArray") - ctx.addMutableState("UTF8String[]", keyArray, s"this.$keyArray = null;") - ctx.addMutableState("UTF8String[]", valueArray, s"this.$valueArray = null;") - - val keyData = s"new $arrayClass($keyArray)" - val valueData = s"new $arrayClass($valueArray)" - val tempArray = ctx.freshName("tempArray") val keyValue = ctx.freshName("keyValue") val i = ctx.freshName("i") @@ -442,19 +437,16 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E s""" UTF8String[] $tempArray = ($text).split($pairDelim, -1); - $keyArray = new UTF8String[$tempArray.length]; - $valueArray = new UTF8String[$tempArray.length]; + UTF8String[] $keyArray = new UTF8String[$tempArray.length]; + UTF8String[] $valueArray = new UTF8String[$tempArray.length]; for (int $i = 0; $i < $tempArray.length; $i ++) { - UTF8String[] $keyValue = - ($tempArray[$i]).split($keyValueDelim, 2); + UTF8String[] $keyValue = ($tempArray[$i]).split($keyValueDelim, 2); $keyArray[$i] = $keyValue[0]; $valueArray[$i] = $keyValue[1]; } - ${ev.value} = new $mapClass($keyData, $valueData); - this.$keyArray = null; - this.$valueArray = null; + ${ev.value} = new $mapClass(new $arrayClass($keyArray), new $arrayClass($valueArray)); """ }) } From 94c18ff013fbd610832dab187d1ba63408251e3d Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 6 Jul 2016 10:52:58 +0530 Subject: [PATCH 07/31] added example --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 28717b967d55e..e00402a120b55 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -400,7 +400,8 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression @ExpressionDescription( usage = """_FUNC_(text[, pairDelim, keyValueDelim]) - Creates a map after splitting the text into key/value pairs using delimiters. - Default delimiters are ',' for pairDelim and '=' for keyValueDelim.""") + Default delimiters are ',' for pairDelim and '=' for keyValueDelim.""", + extended = """ > SELECT _FUNC_('a:1,b:2,c:3',',',':');\n map("a":"1","b":"2","c":"3") """) case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression) extends TernaryExpression with ExpectsInputTypes { From 2725445f9931dca837d2fc628e23dd2a6e18d94b Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 6 Jul 2016 16:21:30 +0530 Subject: [PATCH 08/31] address comments --- .../expressions/complexTypeCreator.scala | 26 +++++++++++++++---- .../expressions/ComplexTypeSuite.scala | 15 ++++++++--- .../spark/sql/DataFrameFunctionsSuite.scala | 4 +-- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index e00402a120b55..e118c781c39a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -400,13 +400,17 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression @ExpressionDescription( usage = """_FUNC_(text[, pairDelim, keyValueDelim]) - Creates a map after splitting the text into key/value pairs using delimiters. - Default delimiters are ',' for pairDelim and '=' for keyValueDelim.""", + Default delimiters are ',' for pairDelim and ':' for keyValueDelim.""", extended = """ > SELECT _FUNC_('a:1,b:2,c:3',',',':');\n map("a":"1","b":"2","c":"3") """) case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression) extends TernaryExpression with ExpectsInputTypes { + def this(child: Expression, pairDelim: Expression) = { + this(child, pairDelim, Literal(":")) + } + def this(child: Expression) = { - this(child, Literal(","), Literal("=")) + this(child, Literal(","), Literal(":")) } override def children: Seq[Expression] = Seq(text, pairDelim, keyValueDelim) @@ -418,7 +422,14 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def nullSafeEval(str: Any, delim1: Any, delim2: Any): Any = { val array = str.asInstanceOf[UTF8String] .split(delim1.asInstanceOf[UTF8String], -1) - .map{_.split(delim2.asInstanceOf[UTF8String], 2)} + .map { kv => + val arr = kv.split(delim2.asInstanceOf[UTF8String], 2) + if(arr.length < 2) { + Array(arr(0), null) + } else { + arr + } + } ArrayBasedMapData(array.map(_(0)), array.map(_(1))) } @@ -441,10 +452,15 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E UTF8String[] $keyArray = new UTF8String[$tempArray.length]; UTF8String[] $valueArray = new UTF8String[$tempArray.length]; - for (int $i = 0; $i < $tempArray.length; $i ++) { + for (int $i = 0; $i < $tempArray.length; $i++) { UTF8String[] $keyValue = ($tempArray[$i]).split($keyValueDelim, 2); $keyArray[$i] = $keyValue[0]; - $valueArray[$i] = $keyValue[1]; + if ($keyValue.length < 2) { + $valueArray[$i] = null; + } + else { + $valueArray[$i] = $keyValue[1]; + } } ${ev.value} = new $mapClass(new $arrayClass($keyArray), new $arrayClass($valueArray)); diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index 6133824404f2d..9afcbd51fd53e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -250,13 +250,22 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { test("StringToMap") { val s0 = Literal("a:1,b:2,c:3") val m0 = Map("a" -> "1", "b" -> "2", "c" -> "3") + checkEvaluation(new StringToMap(s0), m0) + val s1 = Literal("a: ,b:2") val m1 = Map("a" -> " ", "b" -> "2") + checkEvaluation(new StringToMap(s1), m1) + val s2 = Literal("a=1,b=2,c=3") val m2 = Map("a" -> "1", "b" -> "2", "c" -> "3") + checkEvaluation(StringToMap(s2, Literal(","), Literal("=")), m2) + + val s3 = Literal("") + val m3 = Map[String, String]("" -> null) + checkEvaluation(StringToMap(s3, Literal(","), Literal("=")), m3) - checkEvaluation(StringToMap(s0, Literal(","), Literal(":")), m0) - checkEvaluation(StringToMap(s1, Literal(","), Literal(":")), m1) - checkEvaluation(new StringToMap(s2), m2) + val s4 = Literal("a:1_b:2_c:3") + val m4 = Map("a" -> "1", "b" -> "2", "c" -> "3") + checkEvaluation(new StringToMap(s4, Literal("_")), m4) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 13f899833c90c..667660332be03 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -412,7 +412,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { ).toDF("a", "b") checkAnswer( - df1.selectExpr("str_to_map(a)"), + df1.selectExpr("str_to_map(a,',','=')"), Seq( Row(Map("a" -> "1", "b" -> "2")), Row(Map("a" -> "1", "b" -> "2", "c" -> "3")) @@ -422,7 +422,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { val df2 = Seq(("a:1,b:2,c:3", "y")).toDF("a", "b") checkAnswer( - df2.selectExpr("str_to_map(a,',',':')"), + df2.selectExpr("str_to_map(a)"), Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) ) } From 9ef8b67ea24f5ce70e21d88cfd97afb04c2a7892 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Thu, 7 Jul 2016 00:18:45 +0530 Subject: [PATCH 09/31] fix style --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index e118c781c39a4..321dd12c5acb2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -457,8 +457,7 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E $keyArray[$i] = $keyValue[0]; if ($keyValue.length < 2) { $valueArray[$i] = null; - } - else { + } else { $valueArray[$i] = $keyValue[1]; } } From 566f372803954f56a3293a9f144191211a762748 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Thu, 7 Jul 2016 00:34:15 +0530 Subject: [PATCH 10/31] fix usage text --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 321dd12c5acb2..f444dbcfee10e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -398,9 +398,9 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression * Creates a map after splitting the input text into key/value pairs using delimeters */ @ExpressionDescription( - usage = """_FUNC_(text[, pairDelim, keyValueDelim]) - Creates a map after splitting the text into - key/value pairs using delimiters. - Default delimiters are ',' for pairDelim and ':' for keyValueDelim.""", + usage = "_FUNC_(text[, pairDelim, keyValueDelim]) - Creates a map after splitting the text " + + "into key/value pairs using delimiters. " + + "Default delimiters are ',' for pairDelim and ':' for keyValueDelim.", extended = """ > SELECT _FUNC_('a:1,b:2,c:3',',',':');\n map("a":"1","b":"2","c":"3") """) case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression) extends TernaryExpression with ExpectsInputTypes { From 01c4912285c05b6756ac67a496bd36bcfdbf0b14 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Mon, 11 Jul 2016 18:12:31 +0530 Subject: [PATCH 11/31] move tests to String Functions Suite --- .../spark/sql/DataFrameFunctionsSuite.scala | 21 ------------------ .../spark/sql/StringFunctionsSuite.scala | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 667660332be03..844183f26ef2f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -405,25 +405,4 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { ) } - test("str_to_map function") { - val df1 = Seq( - ("a=1,b=2", "y"), - ("a=1,b=2,c=3", "y") - ).toDF("a", "b") - - checkAnswer( - df1.selectExpr("str_to_map(a,',','=')"), - Seq( - Row(Map("a" -> "1", "b" -> "2")), - Row(Map("a" -> "1", "b" -> "2", "c" -> "3")) - ) - ) - - val df2 = Seq(("a:1,b:2,c:3", "y")).toDF("a", "b") - - checkAnswer( - df2.selectExpr("str_to_map(a)"), - Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) - ) - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 3edd9884961bc..c36e72d629cab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -349,4 +349,26 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { df2.filter("b>0").selectExpr("format_number(a, b)"), Row("5.0000") :: Row("4.000") :: Row("4.000") :: Row("4.000") :: Row("3.00") :: Nil) } + + test("str_to_map function") { + val df1 = Seq( + ("a=1,b=2", "y"), + ("a=1,b=2,c=3", "y") + ).toDF("a", "b") + + checkAnswer( + df1.selectExpr("str_to_map(a,',','=')"), + Seq( + Row(Map("a" -> "1", "b" -> "2")), + Row(Map("a" -> "1", "b" -> "2", "c" -> "3")) + ) + ) + + val df2 = Seq(("a:1,b:2,c:3", "y")).toDF("a", "b") + + checkAnswer( + df2.selectExpr("str_to_map(a)"), + Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) + ) + } } From 166b1a91fc418c78ff4d822a58d63e31f515f458 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Mon, 11 Jul 2016 18:39:42 +0530 Subject: [PATCH 12/31] Analysis Exception if any argument is null --- .../expressions/complexTypeCreator.scala | 33 ++++++++++++------- .../spark/sql/StringFunctionsSuite.scala | 15 +++++++++ 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index f444dbcfee10e..34b019212e17b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.expressions +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen._ @@ -419,19 +420,29 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def dataType: DataType = MapType(StringType, StringType, valueContainsNull = false) - override def nullSafeEval(str: Any, delim1: Any, delim2: Any): Any = { - val array = str.asInstanceOf[UTF8String] - .split(delim1.asInstanceOf[UTF8String], -1) - .map { kv => - val arr = kv.split(delim2.asInstanceOf[UTF8String], 2) - if(arr.length < 2) { - Array(arr(0), null) - } else { - arr + override def eval(input: InternalRow): Any = { + val exprs = children + val value1 = exprs(0).eval(input) + if (value1 != null) { + val value2 = exprs(1).eval(input) + if (value2 != null) { + val value3 = exprs(2).eval(input) + if (value3 != null) { + val array = value1.asInstanceOf[UTF8String] + .split(value2.asInstanceOf[UTF8String], -1) + .map { kv => + val arr = kv.split(value3.asInstanceOf[UTF8String], 2) + if(arr.length < 2) { + Array(arr(0), null) + } else { + arr + } + } + return ArrayBasedMapData(array.map(_(0)), array.map(_(1))) } } - - ArrayBasedMapData(array.map(_(0)), array.map(_(1))) + } + throw new AnalysisException("All arguments should be a string literal.") } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index d0874cadbda35..d6d9e2797b642 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -405,4 +405,19 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { df2.selectExpr("str_to_map(a)"), Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) ) + + // All arguments should be string literals. + val m1 = intercept[AnalysisException]{ + sql("select str_to_map('a:1,b:2,c:3',null,null)").collect() + }.getMessage + val m2 = intercept[AnalysisException]{ + sql("select str_to_map('a:1,b:2,c:3',null)").collect() + }.getMessage + val m3 = intercept[AnalysisException]{ + sql("select str_to_map(null,null)").collect() + }.getMessage + + assert(m1 == m2 && m2 == m3) + assert(m1.contains("All arguments should be a string literal.")) + } } From 0b4419fd9a80f052ca14ad9594495c74da8cd9a2 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Mon, 11 Jul 2016 18:43:38 +0530 Subject: [PATCH 13/31] remove line --- .../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 844183f26ef2f..0f6c49e759590 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -404,5 +404,4 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { Seq(Row(true), Row(true)) ) } - } From 3c5a7c8e5984aa87da0301ed9a6eb295b3d29695 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 12 Jul 2016 18:11:40 +0530 Subject: [PATCH 14/31] move type checking to checkInputDataTypes instead of doing it in eval --- .../expressions/complexTypeCreator.scala | 43 ++++++++----------- .../spark/sql/StringFunctionsSuite.scala | 5 ++- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 34b019212e17b..8de0a9874dd86 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -404,7 +404,7 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression "Default delimiters are ',' for pairDelim and ':' for keyValueDelim.", extended = """ > SELECT _FUNC_('a:1,b:2,c:3',',',':');\n map("a":"1","b":"2","c":"3") """) case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression) - extends TernaryExpression with ExpectsInputTypes { + extends TernaryExpression { def this(child: Expression, pairDelim: Expression) = { this(child, pairDelim, Literal(":")) @@ -416,33 +416,28 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def children: Seq[Expression] = Seq(text, pairDelim, keyValueDelim) - override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, StringType) - override def dataType: DataType = MapType(StringType, StringType, valueContainsNull = false) - override def eval(input: InternalRow): Any = { - val exprs = children - val value1 = exprs(0).eval(input) - if (value1 != null) { - val value2 = exprs(1).eval(input) - if (value2 != null) { - val value3 = exprs(2).eval(input) - if (value3 != null) { - val array = value1.asInstanceOf[UTF8String] - .split(value2.asInstanceOf[UTF8String], -1) - .map { kv => - val arr = kv.split(value3.asInstanceOf[UTF8String], 2) - if(arr.length < 2) { - Array(arr(0), null) - } else { - arr - } - } - return ArrayBasedMapData(array.map(_(0)), array.map(_(1))) + override def checkInputDataTypes(): TypeCheckResult = { + if (children.map(_.dataType).forall(_ == StringType)) { + TypeCheckResult.TypeCheckSuccess + } else { + TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments should be string literal.") + } + } + + override def nullSafeEval(str: Any, delim1: Any, delim2: Any): Any = { + val array = str.asInstanceOf[UTF8String] + .split(delim1.asInstanceOf[UTF8String], -1) + .map { kv => + val arr = kv.split(delim2.asInstanceOf[UTF8String], 2) + if(arr.length < 2) { + Array(arr(0), null) + } else { + arr } } - } - throw new AnalysisException("All arguments should be a string literal.") + ArrayBasedMapData(array.map(_(0)), array.map(_(1))) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index d6d9e2797b642..966e305b7003f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -417,7 +417,8 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { sql("select str_to_map(null,null)").collect() }.getMessage - assert(m1 == m2 && m2 == m3) - assert(m1.contains("All arguments should be a string literal.")) + println(Seq(m1, m2, m3)) // scalastyle:ignore + + assert(Seq(m1, m2, m3).forall(_.contains("all arguments should be string literal."))) } } From 1592ffca6506d4c1dcb099a8dc233d6184ac7d83 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 12 Jul 2016 18:12:12 +0530 Subject: [PATCH 15/31] remove print --- .../test/scala/org/apache/spark/sql/StringFunctionsSuite.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 966e305b7003f..387cc5026580e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -417,8 +417,6 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { sql("select str_to_map(null,null)").collect() }.getMessage - println(Seq(m1, m2, m3)) // scalastyle:ignore - assert(Seq(m1, m2, m3).forall(_.contains("all arguments should be string literal."))) } } From e52c23b24284a8b24d9140bdb766984ff9b50769 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 12 Jul 2016 18:48:50 +0530 Subject: [PATCH 16/31] add codegenFallback --- .../expressions/complexTypeCreator.scala | 41 ++----------------- .../spark/sql/StringFunctionsSuite.scala | 2 +- 2 files changed, 5 insertions(+), 38 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 8de0a9874dd86..b45726063c5cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -404,7 +404,7 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression "Default delimiters are ',' for pairDelim and ':' for keyValueDelim.", extended = """ > SELECT _FUNC_('a:1,b:2,c:3',',',':');\n map("a":"1","b":"2","c":"3") """) case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression) - extends TernaryExpression { + extends TernaryExpression with CodegenFallback{ def this(child: Expression, pairDelim: Expression) = { this(child, pairDelim, Literal(":")) @@ -422,7 +422,7 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E if (children.map(_.dataType).forall(_ == StringType)) { TypeCheckResult.TypeCheckSuccess } else { - TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments should be string literal.") + TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments should be of type string.") } } @@ -431,46 +431,13 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E .split(delim1.asInstanceOf[UTF8String], -1) .map { kv => val arr = kv.split(delim2.asInstanceOf[UTF8String], 2) - if(arr.length < 2) { + if (arr.length < 2) { Array(arr(0), null) } else { arr } } - ArrayBasedMapData(array.map(_(0)), array.map(_(1))) - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - - nullSafeCodeGen(ctx, ev, (text, pairDelim, keyValueDelim) => { - val arrayClass = classOf[GenericArrayData].getName - val mapClass = classOf[ArrayBasedMapData].getName - - val keyArray = ctx.freshName("keyArray") - val valueArray = ctx.freshName("valueArray") - val tempArray = ctx.freshName("tempArray") - val keyValue = ctx.freshName("keyValue") - val i = ctx.freshName("i") - - s""" - UTF8String[] $tempArray = ($text).split($pairDelim, -1); - - UTF8String[] $keyArray = new UTF8String[$tempArray.length]; - UTF8String[] $valueArray = new UTF8String[$tempArray.length]; - - for (int $i = 0; $i < $tempArray.length; $i++) { - UTF8String[] $keyValue = ($tempArray[$i]).split($keyValueDelim, 2); - $keyArray[$i] = $keyValue[0]; - if ($keyValue.length < 2) { - $valueArray[$i] = null; - } else { - $valueArray[$i] = $keyValue[1]; - } - } - - ${ev.value} = new $mapClass(new $arrayClass($keyArray), new $arrayClass($valueArray)); - """ - }) + ArrayBasedMapData(array.map(_ (0)), array.map(_ (1))) } override def prettyName: String = "str_to_map" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 387cc5026580e..31857de24d0ce 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -417,6 +417,6 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { sql("select str_to_map(null,null)").collect() }.getMessage - assert(Seq(m1, m2, m3).forall(_.contains("all arguments should be string literal."))) + assert(Seq(m1, m2, m3).forall(_.contains("all arguments should be of type string."))) } } From 6be775a81469da666e14413e862a2e64cafe9e7b Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 12 Jul 2016 18:54:59 +0530 Subject: [PATCH 17/31] foldable check for all --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index b45726063c5cf..096db0a7d40f0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -421,8 +421,11 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def checkInputDataTypes(): TypeCheckResult = { if (children.map(_.dataType).forall(_ == StringType)) { TypeCheckResult.TypeCheckSuccess + } else if (!foldable) { + TypeCheckResult.TypeCheckFailure( + s"String To Map's arguments must be foldable, but got $children.") } else { - TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments should be of type string.") + TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments must be of type string.") } } From b5ef2db2bdd6b3c03121aeeb3537f14a9e4c0927 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 12 Jul 2016 21:19:52 +0530 Subject: [PATCH 18/31] foldable fix --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 096db0a7d40f0..8dd38180b27d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -421,7 +421,7 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def checkInputDataTypes(): TypeCheckResult = { if (children.map(_.dataType).forall(_ == StringType)) { TypeCheckResult.TypeCheckSuccess - } else if (!foldable) { + } else if (Seq(pairDelim, keyValueDelim).exists(!_.foldable)) { TypeCheckResult.TypeCheckFailure( s"String To Map's arguments must be foldable, but got $children.") } else { From d14dc0c402aa6a68b6467afe0853530268e58773 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 12 Jul 2016 21:22:47 +0530 Subject: [PATCH 19/31] forall foldable and then else --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 8dd38180b27d1..c1738011dcf9c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -421,11 +421,11 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def checkInputDataTypes(): TypeCheckResult = { if (children.map(_.dataType).forall(_ == StringType)) { TypeCheckResult.TypeCheckSuccess - } else if (Seq(pairDelim, keyValueDelim).exists(!_.foldable)) { + } else if (Seq(pairDelim, keyValueDelim).forall(_.foldable)) { + TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments must be of type string.") + } else { TypeCheckResult.TypeCheckFailure( s"String To Map's arguments must be foldable, but got $children.") - } else { - TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments must be of type string.") } } From 34db7bb26b0009ff29bd7e0100dbfb552e84ca21 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Tue, 12 Jul 2016 21:26:43 +0530 Subject: [PATCH 20/31] fix test --- .../test/scala/org/apache/spark/sql/StringFunctionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 31857de24d0ce..9e4413225d5c2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -417,6 +417,6 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { sql("select str_to_map(null,null)").collect() }.getMessage - assert(Seq(m1, m2, m3).forall(_.contains("all arguments should be of type string."))) + assert(Seq(m1, m2, m3).forall(_.contains("all arguments must be of type string."))) } } From 34db9cf5357e478d3f31133015fc8a9cbe14ace8 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 13 Jul 2016 09:17:42 +0530 Subject: [PATCH 21/31] fix inputdatatype check --- .../catalyst/expressions/complexTypeCreator.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index c1738011dcf9c..6e7d27e276f61 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -420,12 +420,14 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def checkInputDataTypes(): TypeCheckResult = { if (children.map(_.dataType).forall(_ == StringType)) { - TypeCheckResult.TypeCheckSuccess - } else if (Seq(pairDelim, keyValueDelim).forall(_.foldable)) { - TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments must be of type string.") + if (Seq(pairDelim, keyValueDelim).forall(_.foldable)) { + TypeCheckResult.TypeCheckSuccess + } else { + TypeCheckResult.TypeCheckFailure( + s"String To Map's arguments must be foldable, but got $children.") + } } else { - TypeCheckResult.TypeCheckFailure( - s"String To Map's arguments must be foldable, but got $children.") + TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments must be of type string.") } } From 0da97da2a1c7596e45b7e1675cf5975fa7beb360 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 13 Jul 2016 09:28:13 +0530 Subject: [PATCH 22/31] remove extra , --- .../scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala index 59bb30acd944e..c59ac3dcafea4 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala @@ -238,6 +238,6 @@ private[sql] class HiveSessionCatalog( "hash", "histogram_numeric", "percentile", - "percentile_approx", + "percentile_approx" ) } From 1e604e8a461caa96fff6089ddb46a3201716ea86 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 13 Jul 2016 10:08:55 +0530 Subject: [PATCH 23/31] use pretty name --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 6e7d27e276f61..28c39213d025c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -424,10 +424,10 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E TypeCheckResult.TypeCheckSuccess } else { TypeCheckResult.TypeCheckFailure( - s"String To Map's arguments must be foldable, but got $children.") + s"$prettyName's arguments must be foldable, but got $children.") } } else { - TypeCheckResult.TypeCheckFailure(s"String To Map's all arguments must be of type string.") + TypeCheckResult.TypeCheckFailure(s"$prettyName's all arguments must be of type string.") } } From 539ae185600fd19fa492c8637d4e366bc7bbdc5c Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 13 Jul 2016 20:23:30 +0530 Subject: [PATCH 24/31] both delim fix --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 28c39213d025c..2c96c818b322a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -427,7 +427,7 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E s"$prettyName's arguments must be foldable, but got $children.") } } else { - TypeCheckResult.TypeCheckFailure(s"$prettyName's all arguments must be of type string.") + TypeCheckResult.TypeCheckFailure(s"$prettyName's both delimiters must be of type string.") } } From f2727a79a010e60669e412f7f79868be987e3713 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 13 Jul 2016 20:36:34 +0530 Subject: [PATCH 25/31] add argument type checking tests --- .../sql/catalyst/expressions/ComplexTypeSuite.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index 9afcbd51fd53e..a06fcb45ebbb3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -267,5 +267,14 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { val s4 = Literal("a:1_b:2_c:3") val m4 = Map("a" -> "1", "b" -> "2", "c" -> "3") checkEvaluation(new StringToMap(s4, Literal("_")), m4) + + // arguments checking + assert(new StringToMap(Literal("a:1,b:2,c:3")).checkInputDataTypes().isSuccess) + assert(new StringToMap(Literal(null)).checkInputDataTypes().isFailure) + assert(new StringToMap(Literal("a:1,b:2,c:3"), Literal(null)).checkInputDataTypes().isFailure) + assert(StringToMap(Literal("a:1,b:2,c:3"), Literal(null), Literal(null)) + .checkInputDataTypes().isFailure) + assert(new StringToMap(Literal(null), Literal(null)).checkInputDataTypes().isFailure) + } } From cbc87985985798bee7cbfe8382487153b9b075f8 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 13 Jul 2016 20:40:09 +0530 Subject: [PATCH 26/31] fix test --- .../test/scala/org/apache/spark/sql/StringFunctionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 9e4413225d5c2..42dc5412d0655 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -417,6 +417,6 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { sql("select str_to_map(null,null)").collect() }.getMessage - assert(Seq(m1, m2, m3).forall(_.contains("all arguments must be of type string."))) + assert(Seq(m1, m2, m3).forall(_.contains("both delimiters must be of type string."))) } } From c7059880c2ce771a380dec9d3821dbc497fd501e Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 13 Jul 2016 22:38:38 +0530 Subject: [PATCH 27/31] fix error --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 4 ++-- .../scala/org/apache/spark/sql/StringFunctionsSuite.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 2c96c818b322a..b723aa8df9271 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -424,10 +424,10 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E TypeCheckResult.TypeCheckSuccess } else { TypeCheckResult.TypeCheckFailure( - s"$prettyName's arguments must be foldable, but got $children.") + s"$prettyName's delimeters must be foldable, but got $children.") } } else { - TypeCheckResult.TypeCheckFailure(s"$prettyName's both delimiters must be of type string.") + TypeCheckResult.TypeCheckFailure(s"$prettyName's all arguments must be of type string.") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 42dc5412d0655..9e4413225d5c2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -417,6 +417,6 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { sql("select str_to_map(null,null)").collect() }.getMessage - assert(Seq(m1, m2, m3).forall(_.contains("both delimiters must be of type string."))) + assert(Seq(m1, m2, m3).forall(_.contains("all arguments must be of type string."))) } } From e701716de63b4c00d971bd6c307c447b501e5353 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 13 Jul 2016 22:39:36 +0530 Subject: [PATCH 28/31] fix typo --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index b723aa8df9271..54be7186be44d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -396,7 +396,7 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression } /** - * Creates a map after splitting the input text into key/value pairs using delimeters + * Creates a map after splitting the input text into key/value pairs using delimiters */ @ExpressionDescription( usage = "_FUNC_(text[, pairDelim, keyValueDelim]) - Creates a map after splitting the text " + @@ -424,7 +424,7 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E TypeCheckResult.TypeCheckSuccess } else { TypeCheckResult.TypeCheckFailure( - s"$prettyName's delimeters must be foldable, but got $children.") + s"$prettyName's delimiters must be foldable, but got $children.") } } else { TypeCheckResult.TypeCheckFailure(s"$prettyName's all arguments must be of type string.") From 8172bd55b2738bd246637dc9048361526f8a283b Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 13 Jul 2016 22:44:54 +0530 Subject: [PATCH 29/31] remove end2end tests --- .../org/apache/spark/sql/StringFunctionsSuite.scala | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 9e4413225d5c2..524926e1e9b66 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -406,17 +406,5 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) ) - // All arguments should be string literals. - val m1 = intercept[AnalysisException]{ - sql("select str_to_map('a:1,b:2,c:3',null,null)").collect() - }.getMessage - val m2 = intercept[AnalysisException]{ - sql("select str_to_map('a:1,b:2,c:3',null)").collect() - }.getMessage - val m3 = intercept[AnalysisException]{ - sql("select str_to_map(null,null)").collect() - }.getMessage - - assert(Seq(m1, m2, m3).forall(_.contains("all arguments must be of type string."))) } } From 1e3577962805a7f7041e5f07af35d6425f93ac90 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Mon, 18 Jul 2016 20:43:28 +0530 Subject: [PATCH 30/31] address comments --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 2 +- .../spark/sql/catalyst/expressions/ComplexTypeSuite.scala | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 54be7186be44d..93d2456139b44 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -424,7 +424,7 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E TypeCheckResult.TypeCheckSuccess } else { TypeCheckResult.TypeCheckFailure( - s"$prettyName's delimiters must be foldable, but got $children.") + s"$prettyName's delimiters must be foldable.") } } else { TypeCheckResult.TypeCheckFailure(s"$prettyName's all arguments must be of type string.") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index a06fcb45ebbb3..0c307b2b8576b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -276,5 +276,10 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { .checkInputDataTypes().isFailure) assert(new StringToMap(Literal(null), Literal(null)).checkInputDataTypes().isFailure) + assert(new StringToMap(Literal("a:1_b:2_c:3"), NonFoldableLiteral("_")) + .checkInputDataTypes().isFailure) + assert( + new StringToMap(Literal("a=1_b=2_c=3"), Literal("_"), NonFoldableLiteral("=")) + .checkInputDataTypes().isFailure) } } From 8aabd3725a97fab3c45e19ba2b4f35b533a5d9d5 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Thu, 21 Jul 2016 22:35:42 +0530 Subject: [PATCH 31/31] extend expectsInputtypes --- .../expressions/complexTypeCreator.scala | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 93d2456139b44..b3c5c585c5a52 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen._ @@ -404,7 +403,7 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression "Default delimiters are ',' for pairDelim and ':' for keyValueDelim.", extended = """ > SELECT _FUNC_('a:1,b:2,c:3',',',':');\n map("a":"1","b":"2","c":"3") """) case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression) - extends TernaryExpression with CodegenFallback{ + extends TernaryExpression with CodegenFallback with ExpectsInputTypes { def this(child: Expression, pairDelim: Expression) = { this(child, pairDelim, Literal(":")) @@ -416,18 +415,15 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def children: Seq[Expression] = Seq(text, pairDelim, keyValueDelim) + override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, StringType) + override def dataType: DataType = MapType(StringType, StringType, valueContainsNull = false) override def checkInputDataTypes(): TypeCheckResult = { - if (children.map(_.dataType).forall(_ == StringType)) { - if (Seq(pairDelim, keyValueDelim).forall(_.foldable)) { - TypeCheckResult.TypeCheckSuccess - } else { - TypeCheckResult.TypeCheckFailure( - s"$prettyName's delimiters must be foldable.") - } + if (Seq(pairDelim, keyValueDelim).exists(! _.foldable)) { + TypeCheckResult.TypeCheckFailure(s"$prettyName's delimiters must be foldable.") } else { - TypeCheckResult.TypeCheckFailure(s"$prettyName's all arguments must be of type string.") + super.checkInputDataTypes() } }