From 794829c57f12b2b303c717f3364929a285634993 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 14 Mar 2021 11:29:54 +0900 Subject: [PATCH] [SPARK-34727][SQL] Fix discrepancy in casting float to timestamp ### What changes were proposed in this pull request? In non-ANSI mode, casting float to timestamp has different implementation for codegen on and off. Codegen on: 1. Multiply float input by MICROS_PER_SECOND 2. Cast resulting float value to long Codegen off: 1. CAST float input to double input 2. Multiply double input by MICROS_PER_SECOND 3. Cast resulting double value to long In the PR, I propose to align to non-codegen code, and cast input float to double in codegen. ### Why are the changes needed? This fixes the issue which is demonstrated by the code: ```sql spark-sql> CREATE TEMP VIEW v1 AS SELECT 16777215.0f AS f; spark-sql> SELECT * FROM v1; 1.6777215E7 spark-sql> SELECT CAST(f AS TIMESTAMP) FROM v1; 1970-07-14 07:20:15 spark-sql> CACHE TABLE v1; spark-sql> SELECT * FROM v1; 1.6777215E7 spark-sql> SELECT CAST(f AS TIMESTAMP) FROM v1; 1970-07-14 07:20:14.951424 ``` The result from the cached view **1970-07-14 07:20:14.951424** is different from un-cached view **1970-07-14 07:20:15**. ### Does this PR introduce _any_ user-facing change? Yes. After the changes, the example above outputs the same timestamp for the cached view: ```sql spark-sql> CACHE TABLE v1; spark-sql> SELECT * FROM v1; 1.6777215E7 spark-sql> SELECT CAST(f AS TIMESTAMP) FROM v1; 1970-07-14 07:20:15 ``` ### How was this patch tested? By running new test: ``` $ build/sbt "test:testOnly *CastSuite" ``` Closes #31819 from MaxGekk/fix-float-to-timestamp. Authored-by: Max Gekk Signed-off-by: HyukjinKwon --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 72bd9ca4d3d1c..80cd6d6c86312 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1301,7 +1301,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit if (Float.isNaN($c) || Float.isInfinite($c)) { $evNull = true; } else { - $evPrim = (long)($c * $MICROS_PER_SECOND); + $evPrim = (long)((double)$c * $MICROS_PER_SECOND); } """ } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index dae5304653743..d435aa0eb4c05 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -1507,6 +1507,10 @@ class CastSuite extends CastSuiteBase { test("Cast from double II") { checkEvaluation(cast(cast(1.toDouble, TimestampType), DoubleType), 1.toDouble) } + + test("SPARK-34727: cast from float II") { + checkCast(16777215.0f, java.time.Instant.ofEpochSecond(16777215)) + } } /**