From 20b8e77cea6a45d0a4944288cb78fce280fb754d Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 10 Feb 2017 14:01:16 +0900 Subject: [PATCH 1/4] Improve error message when some column types are compatible and others are not in set/union operations --- .../sql/catalyst/analysis/CheckAnalysis.scala | 2 +- .../sql/catalyst/analysis/TypeCoercion.scala | 2 +- .../catalyst/analysis/AnalysisErrorSuite.scala | 15 +++++++++++++++ .../sql/catalyst/analysis/TestRelations.scala | 7 +++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index b4a7c05ee0fd1..ab0c050ebca9c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -321,7 +321,7 @@ trait CheckAnalysis extends PredicateHelper { // Check if the data types match. dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) => // SPARK-18058: we shall not care about the nullability of columns - if (!dt1.sameType(dt2)) { + if (TypeCoercion.findWiderTypeForTwo(dt1.asNullable, dt2.asNullable).isEmpty) { failAnalysis( s""" |${operator.nodeName} can only be performed on tables with the compatible diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index b636c31703152..5ae40dd17c4e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -116,7 +116,7 @@ object TypeCoercion { * i.e. the main difference with [[findTightestCommonType]] is that here we allow some * loss of precision when widening decimal and double, and promotion to string. */ - private def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match { + def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match { case (t1: DecimalType, t2: DecimalType) => Some(DecimalPrecision.widerDecimalType(t1, t2)) case (t: IntegralType, d: DecimalType) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 96aff37a4b4f9..757da2209846d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -282,16 +282,31 @@ class AnalysisErrorSuite extends AnalysisTest { testRelation.union(nestedRelation), "union" :: "the compatible column types" :: Nil) + errorTest( + "union with a incompatible column type and compatible column types", + testRelation3.union(testRelation4), + "union" :: "the compatible column types" :: "MapType" :: "DecimalType" :: Nil) + errorTest( "intersect with incompatible column types", testRelation.intersect(nestedRelation), "intersect" :: "the compatible column types" :: Nil) + errorTest( + "intersect with a incompatible column type and compatible column types", + testRelation3.intersect(testRelation4), + "intersect" :: "the compatible column types" :: "MapType" :: "DecimalType" :: Nil) + errorTest( "except with incompatible column types", testRelation.except(nestedRelation), "except" :: "the compatible column types" :: Nil) + errorTest( + "except with a incompatible column type and compatible column types", + testRelation3.except(testRelation4), + "except" :: "the compatible column types" :: "MapType" :: "DecimalType" :: Nil) + errorTest( "SPARK-9955: correct error message for aggregate", // When parse SQL string, we will wrap aggregate expressions with UnresolvedAlias. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala index 3741a6ba95a86..e12e272aedffe 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala @@ -37,6 +37,13 @@ object TestRelations { AttributeReference("g", DoubleType)(), AttributeReference("h", DecimalType(10, 2))()) + // This is the same with `testRelation3` but only `h` is incompatible type. + val testRelation4 = LocalRelation( + AttributeReference("e", StringType)(), + AttributeReference("f", StringType)(), + AttributeReference("g", StringType)(), + AttributeReference("h", MapType(IntegerType, IntegerType))()) + val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: From ad2fe5f61bb452f95d314c6b144b1f211eee06dc Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sat, 11 Feb 2017 20:22:46 +0900 Subject: [PATCH 2/4] Use simpleString to make the error messages more readable --- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 4 ++-- .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index ab0c050ebca9c..fc7806f443532 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -325,8 +325,8 @@ trait CheckAnalysis extends PredicateHelper { failAnalysis( s""" |${operator.nodeName} can only be performed on tables with the compatible - |column types. $dt1 <> $dt2 at the ${ordinalNumber(ci)} column of - |the ${ordinalNumber(ti + 1)} table + |column types. ${dt1.simpleString} <> ${dt2.simpleString} at the + |${ordinalNumber(ci)} column of the ${ordinalNumber(ti + 1)} table """.stripMargin.replace("\n", " ").trim()) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 757da2209846d..c5e877d12811c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -285,7 +285,7 @@ class AnalysisErrorSuite extends AnalysisTest { errorTest( "union with a incompatible column type and compatible column types", testRelation3.union(testRelation4), - "union" :: "the compatible column types" :: "MapType" :: "DecimalType" :: Nil) + "union" :: "the compatible column types" :: "map" :: "decimal" :: Nil) errorTest( "intersect with incompatible column types", @@ -295,7 +295,7 @@ class AnalysisErrorSuite extends AnalysisTest { errorTest( "intersect with a incompatible column type and compatible column types", testRelation3.intersect(testRelation4), - "intersect" :: "the compatible column types" :: "MapType" :: "DecimalType" :: Nil) + "intersect" :: "the compatible column types" :: "map" :: "decimal" :: Nil) errorTest( "except with incompatible column types", @@ -305,7 +305,7 @@ class AnalysisErrorSuite extends AnalysisTest { errorTest( "except with a incompatible column type and compatible column types", testRelation3.except(testRelation4), - "except" :: "the compatible column types" :: "MapType" :: "DecimalType" :: Nil) + "except" :: "the compatible column types" :: "map" :: "decimal" :: Nil) errorTest( "SPARK-9955: correct error message for aggregate", From 03ec9dea7ddb923b2095c8e373f21d640a1300fd Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 12 Feb 2017 13:46:14 +0900 Subject: [PATCH 3/4] Add access modifier, private[analysis], to findWiderTypeForTwo --- .../sql/catalyst/analysis/TypeCoercion.scala | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 5ae40dd17c4e7..dfaac92e04a2d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -116,17 +116,19 @@ object TypeCoercion { * i.e. the main difference with [[findTightestCommonType]] is that here we allow some * loss of precision when widening decimal and double, and promotion to string. */ - def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match { - case (t1: DecimalType, t2: DecimalType) => - Some(DecimalPrecision.widerDecimalType(t1, t2)) - case (t: IntegralType, d: DecimalType) => - Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d)) - case (d: DecimalType, t: IntegralType) => - Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d)) - case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) => - Some(DoubleType) - case _ => - findTightestCommonTypeToString(t1, t2) + private[analysis] def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = { + (t1, t2) match { + case (t1: DecimalType, t2: DecimalType) => + Some(DecimalPrecision.widerDecimalType(t1, t2)) + case (t: IntegralType, d: DecimalType) => + Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d)) + case (d: DecimalType, t: IntegralType) => + Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d)) + case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) => + Some(DoubleType) + case _ => + findTightestCommonTypeToString(t1, t2) + } } private def findWiderCommonType(types: Seq[DataType]) = { From 4ef5c07dc4bc7b4ba28f3fa2dcc13ad731b2e779 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 13 Feb 2017 21:13:26 +0900 Subject: [PATCH 4/4] Use catalogString instead of simpleString --- .../org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index fc7806f443532..532ecb8757e9c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -325,7 +325,7 @@ trait CheckAnalysis extends PredicateHelper { failAnalysis( s""" |${operator.nodeName} can only be performed on tables with the compatible - |column types. ${dt1.simpleString} <> ${dt2.simpleString} at the + |column types. ${dt1.catalogString} <> ${dt2.catalogString} at the |${ordinalNumber(ci)} column of the ${ordinalNumber(ti + 1)} table """.stripMargin.replace("\n", " ").trim()) }