From 66b6bd5b7e1538d60915b62fa1155dcd86f3411a Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Tue, 11 Sep 2018 14:16:56 +0800 Subject: [PATCH 1/2] [SPARK-25371][SQL] struct() should allow being called with 0 args SPARK-21281 introduced a check for the inputs of `CreateStructLike` to be non-empty. This means that `struct()`, which was previously considered valid, now throws an Exception. This behavior change was introduced in 2.3.0. The change may break users' application on upgrade and it causes `VectorAssembler` to fail when an empty `inputCols` is defined. The PR removes the added check making `struct()` valid again. added UT Closes #22373 from mgaido91/SPARK-25371. Authored-by: Marco Gaido Signed-off-by: Wenchen Fan --- .../apache/spark/ml/feature/VectorAssemblerSuite.scala | 8 ++++++++ .../sql/catalyst/expressions/complexTypeCreator.scala | 5 +---- .../org/apache/spark/sql/DataFrameFunctionsSuite.scala | 2 -- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala index eca065f7e775d..76cdaf738579d 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala @@ -147,4 +147,12 @@ class VectorAssemblerSuite .filter(vectorUDF($"features") > 1) .count() == 1) } + + test("SPARK-25371: VectorAssembler with empty inputCols") { + val inputDF = Seq( + (1, Vectors.dense(1.0, 2.0)), (2, Vectors.sparse(2, Array(1), Array(3.0)))).toDF("i", "v") + val vectorAssembler = new VectorAssembler().setInputCols(Array()).setOutputCol("a") + val output = vectorAssembler.transform(inputDF) + assert(output.select("a").limit(1).collect().head == Row(Vectors.sparse(0, Seq.empty))) + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 047b80ac5289c..1f87ee711ade4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -305,10 +305,7 @@ trait CreateNamedStructLike extends Expression { } override def checkInputDataTypes(): TypeCheckResult = { - if (children.length < 1) { - TypeCheckResult.TypeCheckFailure( - s"input to function $prettyName requires at least one argument") - } else if (children.size % 2 != 0) { + if (children.size % 2 != 0) { TypeCheckResult.TypeCheckFailure(s"$prettyName expects an even number of arguments.") } else { val invalidNames = nameExprs.filterNot(e => e.foldable && e.dataType == StringType) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 50e475984f458..67d8ccb4a5bda 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -465,8 +465,6 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { val funcsMustHaveAtLeastOneArg = ("coalesce", (df: DataFrame) => df.select(coalesce())) :: ("coalesce", (df: DataFrame) => df.selectExpr("coalesce()")) :: - ("named_struct", (df: DataFrame) => df.select(struct())) :: - ("named_struct", (df: DataFrame) => df.selectExpr("named_struct()")) :: ("hash", (df: DataFrame) => df.select(hash())) :: ("hash", (df: DataFrame) => df.selectExpr("hash()")) :: Nil funcsMustHaveAtLeastOneArg.foreach { case (name, func) => From 482367a690f611fa25c8ee45fbf3a1cef9b2b204 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Wed, 12 Sep 2018 10:15:08 +0200 Subject: [PATCH 2/2] address comment --- .../org/apache/spark/ml/feature/VectorAssemblerSuite.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala index 76cdaf738579d..96e3a60fa9f6b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala @@ -149,10 +149,8 @@ class VectorAssemblerSuite } test("SPARK-25371: VectorAssembler with empty inputCols") { - val inputDF = Seq( - (1, Vectors.dense(1.0, 2.0)), (2, Vectors.sparse(2, Array(1), Array(3.0)))).toDF("i", "v") val vectorAssembler = new VectorAssembler().setInputCols(Array()).setOutputCol("a") - val output = vectorAssembler.transform(inputDF) + val output = vectorAssembler.transform(Seq(1).toDF("x")) assert(output.select("a").limit(1).collect().head == Row(Vectors.sparse(0, Seq.empty))) } }