From 78dc47827192d04c8ebfc54df404efea91e36f23 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 11 Sep 2020 09:16:12 +0900 Subject: [PATCH] [SPARK-32840][SQL][3.0] Invalid interval value can happen to be just adhesive with the unit THIS PR backports https://github.com/apache/spark/pull/29708 to 3.0 ### What changes were proposed in this pull request? In this PR, we add a checker for STRING form interval value ahead for parsing multiple units intervals and fail directly if the interval value contains alphabets to prevent correctness issues like `interval '1 day 2' day`=`3 days`. ### Why are the changes needed? fix correctness issue ### Does this PR introduce _any_ user-facing change? yes, in spark 3.0.0 `interval '1 day 2' day`=`3 days` but now we fail with ParseException ### How was this patch tested? add a test. Closes #29716 from yaooqinn/SPARK-32840-30. Authored-by: Kent Yao Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/parser/AstBuilder.scala | 11 +++++- .../resources/sql-tests/inputs/interval.sql | 4 ++ .../sql-tests/results/ansi/interval.sql.out | 38 ++++++++++++++++++- .../sql-tests/results/interval.sql.out | 38 ++++++++++++++++++- 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index b29fe211454f6..307a72eee0376 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2107,7 +2107,16 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val kvs = units.indices.map { i => val u = units(i).getText val v = if (values(i).STRING() != null) { - string(values(i).STRING()) + val value = string(values(i).STRING()) + // SPARK-32840: For invalid cases, e.g. INTERVAL '1 day 2' hour, + // INTERVAL 'interval 1' day, we need to check ahead before they are concatenated with + // units and become valid ones, e.g. '1 day 2 hour'. + // Ideally, we only ensure the value parts don't contain any units here. + if (value.exists(Character.isLetter)) { + throw new ParseException("Can only use numbers in the interval value part for" + + s" multiple unit value pairs interval form, but got invalid value: $value", ctx) + } + value } else { values(i).getText } diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 9ddefa5a34b87..9ad968ecda936 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -188,3 +188,7 @@ select interval '1.2'; select interval '- 2'; select interval '1 day -'; select interval '1 day 1'; + +select interval '1 day 2' day; +select interval 'interval 1' day; +select interval '-\t 1' day; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index eb84f3e87f053..5a66db960ccbd 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 100 +-- Number of queries: 103 -- !query @@ -1054,3 +1054,39 @@ Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7) == SQL == select interval '1 day 1' -------^^^ + + +-- !query +select interval '1 day 2' day +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: 1 day 2(line 1, pos 16) + +== SQL == +select interval '1 day 2' day +----------------^^^ + + +-- !query +select interval 'interval 1' day +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: interval 1(line 1, pos 16) + +== SQL == +select interval 'interval 1' day +----------------^^^ + + +-- !query +select interval '-\t 1' day +-- !query schema +struct +-- !query output +-1 days diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index c5dd1e001bbe0..baf7f16908a5b 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 100 +-- Number of queries: 103 -- !query @@ -1026,3 +1026,39 @@ Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7) == SQL == select interval '1 day 1' -------^^^ + + +-- !query +select interval '1 day 2' day +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: 1 day 2(line 1, pos 16) + +== SQL == +select interval '1 day 2' day +----------------^^^ + + +-- !query +select interval 'interval 1' day +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: interval 1(line 1, pos 16) + +== SQL == +select interval 'interval 1' day +----------------^^^ + + +-- !query +select interval '-\t 1' day +-- !query schema +struct +-- !query output +-1 days