From b612f88b00b60df08d5bb5e688ed74a2bb2e7c9f Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 27 Jul 2020 17:03:14 +0000 Subject: [PATCH] [SPARK-32424][SQL][3.0] Fix silent data change for timestamp parsing if overflow happens --- docs/sql-ref-datetime-pattern.md | 2 +- .../util/DateTimeFormatterHelper.scala | 4 +- .../catalyst/util/TimestampFormatter.scala | 3 +- .../util/TimestampFormatterSuite.scala | 10 + .../inputs/datetime-formatting-invalid.sql | 4 +- .../sql-tests/inputs/datetime-formatting.sql | 2 +- .../inputs/datetime-parsing-invalid.sql | 29 +++ .../sql-tests/inputs/datetime-parsing.sql | 44 ++++ .../resources/sql-tests/inputs/datetime.sql | 4 - .../sql-tests/results/ansi/datetime.sql.out | 28 +-- .../datetime-formatting-invalid.sql.out | 4 +- .../datetime-formatting-legacy.sql.out | 18 +- .../results/datetime-formatting.sql.out | 18 +- .../sql-tests/results/datetime-legacy.sql.out | 26 +-- .../results/datetime-parsing-invalid.sql.out | 162 ++++++++++++++ .../results/datetime-parsing-legacy.sql.out | 202 ++++++++++++++++++ .../results/datetime-parsing.sql.out | 202 ++++++++++++++++++ .../sql-tests/results/datetime.sql.out | 28 +-- 18 files changed, 680 insertions(+), 110 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/datetime-parsing-invalid.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/datetime-parsing.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/datetime-parsing-legacy.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/datetime-parsing.sql.out diff --git a/docs/sql-ref-datetime-pattern.md b/docs/sql-ref-datetime-pattern.md index 3c0bc754f940b..d0299e5a99dc2 100644 --- a/docs/sql-ref-datetime-pattern.md +++ b/docs/sql-ref-datetime-pattern.md @@ -70,7 +70,7 @@ The count of pattern letters determines the format. For formatting, the fraction length would be padded to the number of contiguous 'S' with zeros. Spark supports datetime of micro-of-second precision, which has up to 6 significant digits, but can parse nano-of-second with exceeded part truncated. -- Year: The count of letters determines the minimum field width below which padding is used. If the count of letters is two, then a reduced two digit form is used. For printing, this outputs the rightmost two digits. For parsing, this will parse using the base value of 2000, resulting in a year within the range 2000 to 2099 inclusive. If the count of letters is less than four (but not two), then the sign is only output for negative years. Otherwise, the sign is output if the pad width is exceeded when 'G' is not present. 11 or more letters will fail. +- Year: The count of letters determines the minimum field width below which padding is used. If the count of letters is two, then a reduced two digit form is used. For printing, this outputs the rightmost two digits. For parsing, this will parse using the base value of 2000, resulting in a year within the range 2000 to 2099 inclusive. If the count of letters is less than four (but not two), then the sign is only output for negative years. Otherwise, the sign is output if the pad width is exceeded when 'G' is not present. 7 or more letters will fail. - Month: It follows the rule of Number/Text. The text form is depend on letters - 'M' denotes the 'standard' form, and 'L' is for 'stand-alone' form. These two forms are different only in some certain languages. For example, in Russian, 'Июль' is the stand-alone form of July, and 'Июля' is the standard form. Here are examples for all supported pattern letters: - `'M'` or `'L'`: Month number in a year starting from 1. There is no difference between 'M' and 'L'. Month from 1 to 9 are printed without padding. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 992a2b12a462f..25ce33de17d32 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -276,7 +276,9 @@ private object DateTimeFormatterHelper { // unchecked `ArrayIndexOutOfBoundsException` by the `NumberPrinterParser` for formatting. It // makes the call side difficult to handle exceptions and easily leads to silent data change // because of the exceptions being suppressed. - Seq("y").map(_ * 11) + // SPARK-32424: The max year that we can actually handle is 6 digits, otherwise, it will + // overflow + Seq("y").map(_ * 7) }.toSet /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 87d8a3e682b8e..093ebb27e0dc5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -24,7 +24,6 @@ import java.time.format.{DateTimeFormatter, DateTimeParseException} import java.time.temporal.ChronoField.MICRO_OF_SECOND import java.time.temporal.TemporalQueries import java.util.{Calendar, GregorianCalendar, Locale, TimeZone} -import java.util.concurrent.TimeUnit.SECONDS import org.apache.commons.lang3.time.FastDateFormat @@ -83,7 +82,7 @@ class Iso8601TimestampFormatter( val epochSeconds = zonedDateTime.toEpochSecond val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND) - Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond) + Math.addExact(Math.multiplyExact(epochSeconds, MICROS_PER_SECOND), microsOfSecond) } catch checkParsedDiff(s, legacyFormatter.parse) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 88f6b0d79f85e..5339da5f5b054 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -430,4 +430,14 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { assert(formatter.format(date(1970, 4, 10)) == "100") } } + + test("SPARK-32424: avoid silent data change when timestamp overflows") { + val formatter = TimestampFormatter("y", UTC, isParsing = true) + assert(formatter.parse("294247") === date(294247)) + assert(formatter.parse("-290307") === date(-290307)) + val e1 = intercept[ArithmeticException](formatter.parse("294248")) + assert(e1.getMessage === "long overflow") + val e2 = intercept[ArithmeticException](formatter.parse("-290308")) + assert(e2.getMessage === "long overflow") + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-invalid.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-invalid.sql index 9072aa107f252..11bba00e91abf 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-invalid.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting-invalid.sql @@ -3,8 +3,8 @@ -- separating this from datetime-formatting.sql, because the text form -- for patterns with 5 letters in SimpleDateFormat varies from different JDKs select date_format('2018-11-17 13:33:33.333', 'GGGGG'); --- pattern letter count can not be greater than 10 -select date_format('2018-11-17 13:33:33.333', 'yyyyyyyyyyy'); +-- pattern letter count can not be greater than 6 +select date_format('2018-11-17 13:33:33.333', 'yyyyyyy'); -- q/L in JDK 8 will fail when the count is more than 2 select date_format('2018-11-17 13:33:33.333', 'qqqqq'); select date_format('2018-11-17 13:33:33.333', 'QQQQQ'); diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql index 3b23a7785f6cd..2d70326f4f3c0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-formatting.sql @@ -11,7 +11,7 @@ create temporary view v as select col from values select col, date_format(col, 'G GG GGG GGGG') from v; -select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy yyyyyyyy yyyyyyyyy yyyyyyyyyy') from v; +select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy') from v; select col, date_format(col, 'q qq') from v; diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing-invalid.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing-invalid.sql new file mode 100644 index 0000000000000..a1c02eaa3b0a0 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing-invalid.sql @@ -0,0 +1,29 @@ +--- TESTS FOR DATETIME PARSING FUNCTIONS WITH INVALID VALUES --- + +-- parsing invalid value with pattern 'y' +select to_timestamp('294248', 'y'); -- out of year value range [0, 294247] +select to_timestamp('1', 'yy'); -- the number of digits must be 2 for 'yy'. +select to_timestamp('-12', 'yy'); -- out of year value range [0, 99] for reduced two digit form +select to_timestamp('123', 'yy'); -- the number of digits must be 2 for 'yy'. +select to_timestamp('1', 'yyy'); -- the number of digits must be in [3, 6] for 'yyy' + +select to_timestamp('1234567', 'yyyyyyy'); -- the length of 'y' pattern must be less than 7 + +-- parsing invalid values with pattern 'D' +select to_timestamp('366', 'D'); +select to_timestamp('9', 'DD'); +-- in java 8 this case is invalid, but valid in java 11, disabled for jenkins +-- select to_timestamp('100', 'DD'); +select to_timestamp('366', 'DD'); +select to_timestamp('9', 'DDD'); +select to_timestamp('99', 'DDD'); +select to_timestamp('30-365', 'dd-DDD'); +select to_timestamp('11-365', 'MM-DDD'); +select to_timestamp('2019-366', 'yyyy-DDD'); +select to_timestamp('12-30-365', 'MM-dd-DDD'); +select to_timestamp('2020-01-365', 'yyyy-dd-DDD'); +select to_timestamp('2020-10-350', 'yyyy-MM-DDD'); +select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD'); +-- add a special case to test csv, because the legacy formatter it uses is lenient then Spark should +-- throw SparkUpgradeException +select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD')) diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing.sql new file mode 100644 index 0000000000000..e058bd675c375 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime-parsing.sql @@ -0,0 +1,44 @@ +--- TESTS FOR DATETIME PARSING FUNCTIONS --- + +-- parsing with pattern 'y'. +-- the range of valid year is [-290307, 294247], +-- but particularly, some thrift client use java.sql.Timestamp to parse timestamp, which allows +-- only positive year values less or equal than 9999. So the cases bellow only use [1, 9999] to pass +-- ThriftServerQueryTestSuite +select to_timestamp('1', 'y'); +select to_timestamp('009999', 'y'); + +-- reduced two digit form is used, the range of valid year is 20-[01, 99] +select to_timestamp('00', 'yy'); +select to_timestamp('99', 'yy'); + +-- the range of valid year is [-290307, 294247], the number of digits must be in [3, 6] for 'yyy' +select to_timestamp('001', 'yyy'); +select to_timestamp('009999', 'yyy'); + +-- the range of valid year is [-9999, 9999], the number of digits must be 4 for 'yyyy'. +select to_timestamp('0001', 'yyyy'); +select to_timestamp('9999', 'yyyy'); + +-- the range of valid year is [-99999, 99999], the number of digits must be 5 for 'yyyyy'. +select to_timestamp('00001', 'yyyyy'); +select to_timestamp('09999', 'yyyyy'); + +-- the range of valid year is [-290307, 294247], the number of digits must be 6 for 'yyyyyy'. +select to_timestamp('000001', 'yyyyyy'); +select to_timestamp('009999', 'yyyyyy'); + +-- parsing with pattern 'D' +select to_timestamp('9', 'D'); +select to_timestamp('300', 'D'); +select to_timestamp('09', 'DD'); +select to_timestamp('99', 'DD'); +select to_timestamp('009', 'DDD'); +select to_timestamp('365', 'DDD'); +select to_timestamp('31-365', 'dd-DDD'); +select to_timestamp('12-365', 'MM-DDD'); +select to_timestamp('2020-365', 'yyyy-DDD'); +select to_timestamp('12-31-365', 'MM-dd-DDD'); +select to_timestamp('2020-30-365', 'yyyy-dd-DDD'); +select to_timestamp('2020-12-350', 'yyyy-MM-DDD'); +select to_timestamp('2020-12-31-366', 'yyyy-MM-dd-DDD'); diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index e4a2e5d8c8f3f..db54b5508ec71 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -146,7 +146,3 @@ select from_json('{"time":"26/October/2015"}', 'time Timestamp', map('timestampF select from_json('{"date":"26/October/2015"}', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy')); select from_csv('26/October/2015', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy')); select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy')); - -select from_unixtime(1, 'yyyyyyyyyyy-MM-dd'); -select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss'); -select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd'); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index 1cb1aaa6584db..36674040f1302 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 109 +-- Number of queries: 106 -- !query @@ -913,29 +913,3 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - - --- !query -select from_unixtime(1, 'yyyyyyyyyyy-MM-dd') --- !query schema -struct<> --- !query output -org.apache.spark.SparkUpgradeException -You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - - --- !query -select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss') --- !query schema -struct --- !query output -0000002018-11-17 13:33:33 - - --- !query -select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd') --- !query schema -struct<> --- !query output -org.apache.spark.SparkUpgradeException -You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out index 248157efacde0..18d1a10068794 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out @@ -12,12 +12,12 @@ You may get a different result due to the upgrading of Spark 3.0: Fail to recogn -- !query -select date_format('2018-11-17 13:33:33.333', 'yyyyyyyyyyy') +select date_format('2018-11-17 13:33:33.333', 'yyyyyyy') -- !query schema struct<> -- !query output org.apache.spark.SparkUpgradeException -You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out index b7bc448a952a7..b37922b20807d 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out @@ -32,17 +32,17 @@ struct -- !query -select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy yyyyyyyy yyyyyyyyy yyyyyyyyyy') from v +select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy') from v -- !query schema -struct +struct -- !query output -1582-05-31 19:40:35.123 1582 82 1582 1582 01582 001582 0001582 00001582 000001582 0000001582 -1969-12-31 15:00:00 1969 69 1969 1969 01969 001969 0001969 00001969 000001969 0000001969 -1970-12-31 04:59:59.999 1970 70 1970 1970 01970 001970 0001970 00001970 000001970 0000001970 -1996-03-31 07:03:33.123 1996 96 1996 1996 01996 001996 0001996 00001996 000001996 0000001996 -2018-11-17 05:33:33.123 2018 18 2018 2018 02018 002018 0002018 00002018 000002018 0000002018 -2019-12-31 09:33:33.123 2019 19 2019 2019 02019 002019 0002019 00002019 000002019 0000002019 -2100-01-01 01:33:33.123 2100 00 2100 2100 02100 002100 0002100 00002100 000002100 0000002100 +1582-05-31 19:40:35.123 1582 82 1582 1582 01582 001582 +1969-12-31 15:00:00 1969 69 1969 1969 01969 001969 +1970-12-31 04:59:59.999 1970 70 1970 1970 01970 001970 +1996-03-31 07:03:33.123 1996 96 1996 1996 01996 001996 +2018-11-17 05:33:33.123 2018 18 2018 2018 02018 002018 +2019-12-31 09:33:33.123 2019 19 2019 2019 02019 002019 +2100-01-01 01:33:33.123 2100 00 2100 2100 02100 002100 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out index f724658d354df..5bed88e168f1e 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out @@ -32,17 +32,17 @@ struct -- !query -select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy yyyyyyyy yyyyyyyyy yyyyyyyyyy') from v +select col, date_format(col, 'y yy yyy yyyy yyyyy yyyyyy') from v -- !query schema -struct +struct -- !query output -1582-05-31 19:40:35.123 1582 82 1582 1582 01582 001582 0001582 00001582 000001582 0000001582 -1969-12-31 15:00:00 1969 69 1969 1969 01969 001969 0001969 00001969 000001969 0000001969 -1970-12-31 04:59:59.999 1970 70 1970 1970 01970 001970 0001970 00001970 000001970 0000001970 -1996-03-31 07:03:33.123 1996 96 1996 1996 01996 001996 0001996 00001996 000001996 0000001996 -2018-11-17 05:33:33.123 2018 18 2018 2018 02018 002018 0002018 00002018 000002018 0000002018 -2019-12-31 09:33:33.123 2019 19 2019 2019 02019 002019 0002019 00002019 000002019 0000002019 -2100-01-01 01:33:33.123 2100 00 2100 2100 02100 002100 0002100 00002100 000002100 0000002100 +1582-05-31 19:40:35.123 1582 82 1582 1582 01582 001582 +1969-12-31 15:00:00 1969 69 1969 1969 01969 001969 +1970-12-31 04:59:59.999 1970 70 1970 1970 01970 001970 +1996-03-31 07:03:33.123 1996 96 1996 1996 01996 001996 +2018-11-17 05:33:33.123 2018 18 2018 2018 02018 002018 +2019-12-31 09:33:33.123 2019 19 2019 2019 02019 002019 +2100-01-01 01:33:33.123 2100 00 2100 2100 02100 002100 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index b244f497aeacc..1342c71feecfa 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 109 +-- Number of queries: 106 -- !query @@ -870,27 +870,3 @@ select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy struct> -- !query output {"date":2015-10-26} - - --- !query -select from_unixtime(1, 'yyyyyyyyyyy-MM-dd') --- !query schema -struct --- !query output -00000001969-12-31 - - --- !query -select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss') --- !query schema -struct --- !query output -0000002018-11-17 13:33:33 - - --- !query -select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd') --- !query schema -struct --- !query output -00000002018-11-17 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out new file mode 100644 index 0000000000000..5dd68a54025ef --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out @@ -0,0 +1,162 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 19 + + +-- !query +select to_timestamp('294248', 'y') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp('1', 'yy') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string. + + +-- !query +select to_timestamp('-12', 'yy') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp('123', 'yy') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to parse '123' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string. + + +-- !query +select to_timestamp('1', 'yyy') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string. + + +-- !query +select to_timestamp('1234567', 'yyyyyyy') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select to_timestamp('366', 'D') +-- !query schema +struct +-- !query output +1970-01-01 00:00:00 + + +-- !query +select to_timestamp('9', 'DD') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string. + + +-- !query +select to_timestamp('366', 'DD') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp('9', 'DDD') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string. + + +-- !query +select to_timestamp('99', 'DDD') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to parse '99' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string. + + +-- !query +select to_timestamp('30-365', 'dd-DDD') +-- !query schema +struct +-- !query output +1970-01-30 00:00:00 + + +-- !query +select to_timestamp('11-365', 'MM-DDD') +-- !query schema +struct +-- !query output +1970-11-01 00:00:00 + + +-- !query +select to_timestamp('2019-366', 'yyyy-DDD') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp('12-30-365', 'MM-dd-DDD') +-- !query schema +struct +-- !query output +1970-12-30 00:00:00 + + +-- !query +select to_timestamp('2020-01-365', 'yyyy-dd-DDD') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp('2020-10-350', 'yyyy-MM-DDD') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD') +-- !query schema +struct +-- !query output +NULL + + +-- !query +select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD')) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUpgradeException +You may get a different result due to the upgrading of Spark 3.0: Fail to parse '2018-366' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string. diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-legacy.sql.out new file mode 100644 index 0000000000000..bb7ce74a29ef5 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-legacy.sql.out @@ -0,0 +1,202 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 25 + + +-- !query +select to_timestamp('1', 'y') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('009999', 'y') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('00', 'yy') +-- !query schema +struct +-- !query output +2000-01-01 00:00:00 + + +-- !query +select to_timestamp('99', 'yy') +-- !query schema +struct +-- !query output +1999-01-01 00:00:00 + + +-- !query +select to_timestamp('001', 'yyy') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('009999', 'yyy') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('0001', 'yyyy') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('9999', 'yyyy') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('00001', 'yyyyy') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('09999', 'yyyyy') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('000001', 'yyyyyy') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('009999', 'yyyyyy') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('9', 'D') +-- !query schema +struct +-- !query output +1970-01-09 00:00:00 + + +-- !query +select to_timestamp('300', 'D') +-- !query schema +struct +-- !query output +1970-10-27 00:00:00 + + +-- !query +select to_timestamp('09', 'DD') +-- !query schema +struct +-- !query output +1970-01-09 00:00:00 + + +-- !query +select to_timestamp('99', 'DD') +-- !query schema +struct +-- !query output +1970-04-09 00:00:00 + + +-- !query +select to_timestamp('009', 'DDD') +-- !query schema +struct +-- !query output +1970-01-09 00:00:00 + + +-- !query +select to_timestamp('365', 'DDD') +-- !query schema +struct +-- !query output +1970-12-31 00:00:00 + + +-- !query +select to_timestamp('31-365', 'dd-DDD') +-- !query schema +struct +-- !query output +1970-12-31 00:00:00 + + +-- !query +select to_timestamp('12-365', 'MM-DDD') +-- !query schema +struct +-- !query output +1970-12-31 00:00:00 + + +-- !query +select to_timestamp('2020-365', 'yyyy-DDD') +-- !query schema +struct +-- !query output +2020-12-30 00:00:00 + + +-- !query +select to_timestamp('12-31-365', 'MM-dd-DDD') +-- !query schema +struct +-- !query output +1970-12-31 00:00:00 + + +-- !query +select to_timestamp('2020-30-365', 'yyyy-dd-DDD') +-- !query schema +struct +-- !query output +2020-12-30 00:00:00 + + +-- !query +select to_timestamp('2020-12-350', 'yyyy-MM-DDD') +-- !query schema +struct +-- !query output +2020-12-15 00:00:00 + + +-- !query +select to_timestamp('2020-12-31-366', 'yyyy-MM-dd-DDD') +-- !query schema +struct +-- !query output +2020-12-31 00:00:00 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing.sql.out new file mode 100644 index 0000000000000..c50bbcc8252b5 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing.sql.out @@ -0,0 +1,202 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 25 + + +-- !query +select to_timestamp('1', 'y') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('009999', 'y') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('00', 'yy') +-- !query schema +struct +-- !query output +2000-01-01 00:00:00 + + +-- !query +select to_timestamp('99', 'yy') +-- !query schema +struct +-- !query output +2099-01-01 00:00:00 + + +-- !query +select to_timestamp('001', 'yyy') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('009999', 'yyy') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('0001', 'yyyy') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('9999', 'yyyy') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('00001', 'yyyyy') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('09999', 'yyyyy') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('000001', 'yyyyyy') +-- !query schema +struct +-- !query output +0001-01-01 00:00:00 + + +-- !query +select to_timestamp('009999', 'yyyyyy') +-- !query schema +struct +-- !query output +9999-01-01 00:00:00 + + +-- !query +select to_timestamp('9', 'D') +-- !query schema +struct +-- !query output +1970-01-01 00:00:00 + + +-- !query +select to_timestamp('300', 'D') +-- !query schema +struct +-- !query output +1970-01-01 00:00:00 + + +-- !query +select to_timestamp('09', 'DD') +-- !query schema +struct +-- !query output +1970-01-01 00:00:00 + + +-- !query +select to_timestamp('99', 'DD') +-- !query schema +struct +-- !query output +1970-01-01 00:00:00 + + +-- !query +select to_timestamp('009', 'DDD') +-- !query schema +struct +-- !query output +1970-01-01 00:00:00 + + +-- !query +select to_timestamp('365', 'DDD') +-- !query schema +struct +-- !query output +1970-01-01 00:00:00 + + +-- !query +select to_timestamp('31-365', 'dd-DDD') +-- !query schema +struct +-- !query output +1970-01-31 00:00:00 + + +-- !query +select to_timestamp('12-365', 'MM-DDD') +-- !query schema +struct +-- !query output +1970-12-01 00:00:00 + + +-- !query +select to_timestamp('2020-365', 'yyyy-DDD') +-- !query schema +struct +-- !query output +2020-12-30 00:00:00 + + +-- !query +select to_timestamp('12-31-365', 'MM-dd-DDD') +-- !query schema +struct +-- !query output +1970-12-31 00:00:00 + + +-- !query +select to_timestamp('2020-30-365', 'yyyy-dd-DDD') +-- !query schema +struct +-- !query output +2020-12-30 00:00:00 + + +-- !query +select to_timestamp('2020-12-350', 'yyyy-MM-DDD') +-- !query schema +struct +-- !query output +2020-12-15 00:00:00 + + +-- !query +select to_timestamp('2020-12-31-366', 'yyyy-MM-dd-DDD') +-- !query schema +struct +-- !query output +2020-12-31 00:00:00 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 3255761545fdd..d3657e874343b 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 109 +-- Number of queries: 106 -- !query @@ -885,29 +885,3 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - - --- !query -select from_unixtime(1, 'yyyyyyyyyyy-MM-dd') --- !query schema -struct<> --- !query output -org.apache.spark.SparkUpgradeException -You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html - - --- !query -select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss') --- !query schema -struct --- !query output -0000002018-11-17 13:33:33 - - --- !query -select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd') --- !query schema -struct<> --- !query output -org.apache.spark.SparkUpgradeException -You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html