-
Notifications
You must be signed in to change notification settings - Fork 28.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-32424][SQL][3.0] Fix silent data change for timestamp parsing …
…if overflow happens
- Loading branch information
Showing
18 changed files
with
680 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
sql/core/src/test/resources/sql-tests/inputs/datetime-parsing-invalid.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
--- TESTS FOR DATETIME PARSING FUNCTIONS WITH INVALID VALUES --- | ||
|
||
-- parsing invalid value with pattern 'y' | ||
select to_timestamp('294248', 'y'); -- out of year value range [0, 294247] | ||
select to_timestamp('1', 'yy'); -- the number of digits must be 2 for 'yy'. | ||
select to_timestamp('-12', 'yy'); -- out of year value range [0, 99] for reduced two digit form | ||
select to_timestamp('123', 'yy'); -- the number of digits must be 2 for 'yy'. | ||
select to_timestamp('1', 'yyy'); -- the number of digits must be in [3, 6] for 'yyy' | ||
|
||
select to_timestamp('1234567', 'yyyyyyy'); -- the length of 'y' pattern must be less than 7 | ||
|
||
-- parsing invalid values with pattern 'D' | ||
select to_timestamp('366', 'D'); | ||
select to_timestamp('9', 'DD'); | ||
-- in java 8 this case is invalid, but valid in java 11, disabled for jenkins | ||
-- select to_timestamp('100', 'DD'); | ||
select to_timestamp('366', 'DD'); | ||
select to_timestamp('9', 'DDD'); | ||
select to_timestamp('99', 'DDD'); | ||
select to_timestamp('30-365', 'dd-DDD'); | ||
select to_timestamp('11-365', 'MM-DDD'); | ||
select to_timestamp('2019-366', 'yyyy-DDD'); | ||
select to_timestamp('12-30-365', 'MM-dd-DDD'); | ||
select to_timestamp('2020-01-365', 'yyyy-dd-DDD'); | ||
select to_timestamp('2020-10-350', 'yyyy-MM-DDD'); | ||
select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD'); | ||
-- add a special case to test csv, because the legacy formatter it uses is lenient then Spark should | ||
-- throw SparkUpgradeException | ||
select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD')) |
44 changes: 44 additions & 0 deletions
44
sql/core/src/test/resources/sql-tests/inputs/datetime-parsing.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
--- TESTS FOR DATETIME PARSING FUNCTIONS --- | ||
|
||
-- parsing with pattern 'y'. | ||
-- the range of valid year is [-290307, 294247], | ||
-- but particularly, some thrift client use java.sql.Timestamp to parse timestamp, which allows | ||
-- only positive year values less or equal than 9999. So the cases bellow only use [1, 9999] to pass | ||
-- ThriftServerQueryTestSuite | ||
select to_timestamp('1', 'y'); | ||
select to_timestamp('009999', 'y'); | ||
|
||
-- reduced two digit form is used, the range of valid year is 20-[01, 99] | ||
select to_timestamp('00', 'yy'); | ||
select to_timestamp('99', 'yy'); | ||
|
||
-- the range of valid year is [-290307, 294247], the number of digits must be in [3, 6] for 'yyy' | ||
select to_timestamp('001', 'yyy'); | ||
select to_timestamp('009999', 'yyy'); | ||
|
||
-- the range of valid year is [-9999, 9999], the number of digits must be 4 for 'yyyy'. | ||
select to_timestamp('0001', 'yyyy'); | ||
select to_timestamp('9999', 'yyyy'); | ||
|
||
-- the range of valid year is [-99999, 99999], the number of digits must be 5 for 'yyyyy'. | ||
select to_timestamp('00001', 'yyyyy'); | ||
select to_timestamp('09999', 'yyyyy'); | ||
|
||
-- the range of valid year is [-290307, 294247], the number of digits must be 6 for 'yyyyyy'. | ||
select to_timestamp('000001', 'yyyyyy'); | ||
select to_timestamp('009999', 'yyyyyy'); | ||
|
||
-- parsing with pattern 'D' | ||
select to_timestamp('9', 'D'); | ||
select to_timestamp('300', 'D'); | ||
select to_timestamp('09', 'DD'); | ||
select to_timestamp('99', 'DD'); | ||
select to_timestamp('009', 'DDD'); | ||
select to_timestamp('365', 'DDD'); | ||
select to_timestamp('31-365', 'dd-DDD'); | ||
select to_timestamp('12-365', 'MM-DDD'); | ||
select to_timestamp('2020-365', 'yyyy-DDD'); | ||
select to_timestamp('12-31-365', 'MM-dd-DDD'); | ||
select to_timestamp('2020-30-365', 'yyyy-dd-DDD'); | ||
select to_timestamp('2020-12-350', 'yyyy-MM-DDD'); | ||
select to_timestamp('2020-12-31-366', 'yyyy-MM-dd-DDD'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.