From 24655583f1cb5dae2e80bb572604fb4a9761ec07 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 29 Aug 2019 19:59:24 +0800 Subject: [PATCH] [SPARK-28495][SQL][FOLLOW-UP] Disallow conversions between timestamp and long in ASNI mode ### What changes were proposed in this pull request? Disallow conversions between `timestamp` type and `long` type in table insertion with ANSI store assignment policy. ### Why are the changes needed? In the PR https://github.com/apache/spark/pull/25581, timestamp type is allowed to be converted to long type, since timestamp type is represented by long type internally, and both legacy mode and strict mode allows the conversion. After reconsideration, I think we should disallow it. As per ANSI SQL section "4.4.2 Characteristics of numbers": > A number is assignable only to sites of numeric type. In PostgreSQL, the conversion between timestamp and long is also disallowed. ### Does this PR introduce any user-facing change? Conversion between timestamp and long is disallowed in table insertion with ANSI store assignment policy. ### How was this patch tested? Unit test Closes #25615 from gengliangwang/disallowTimeStampToLong. Authored-by: Gengliang Wang Signed-off-by: Wenchen Fan --- .../apache/spark/sql/catalyst/expressions/Cast.scala | 9 +++++---- .../sql/types/DataTypeWriteCompatibilitySuite.scala | 12 ++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 452f084e19bc2..baa98171e265f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -158,6 +158,11 @@ object Cast { case _ => false } + /** + * Returns true iff we can cast the `from` type to `to` type as per the ANSI SQL. + * In practice, the behavior is mostly the same as PostgreSQL. It disallows certain unreasonable + * type conversions such as converting `string` to `int` or `double` to `boolean`. + */ def canANSIStoreAssign(from: DataType, to: DataType): Boolean = (from, to) match { case _ if from == to => true case (_: NumericType, _: NumericType) => true @@ -165,10 +170,6 @@ object Cast { case (_: CalendarIntervalType, StringType) => true case (DateType, TimestampType) => true case (TimestampType, DateType) => true - // Spark supports casting between long and timestamp, please see `longToTimestamp` and - // `timestampToLong` for details. - case (TimestampType, LongType) => true - case (LongType, TimestampType) => true case (ArrayType(fromType, fn), ArrayType(toType, tn)) => resolvableNullability(fn, tn) && canANSIStoreAssign(fromType, toType) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala index 784cc7a70489f..9d6827194f004 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala @@ -133,6 +133,18 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase assert(err.contains("Cannot safely cast")) } } + + test("Conversions between timestamp and long are not allowed") { + assertSingleError(LongType, TimestampType, "longToTimestamp", + "Should not allow long to timestamp") { err => + assert(err.contains("Cannot safely cast 'longToTimestamp': LongType to TimestampType")) + } + + assertSingleError(TimestampType, LongType, "timestampToLong", + "Should not allow timestamp to long") { err => + assert(err.contains("Cannot safely cast 'timestampToLong': TimestampType to LongType")) + } + } } abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {