From b1b2f7c95461a43b3d718c40e34690d9ab7934f8 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sun, 10 Nov 2019 01:15:01 +0800 Subject: [PATCH 01/12] [SPARK-29605][SQL][FOLLOWUP] Fix cast error when there are spaces between signs and values --- .../sql/catalyst/util/IntervalUtils.scala | 21 ++++++++++++------- .../catalyst/util/IntervalUtilsSuite.scala | 1 + 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index dcd38cee1d2b8..1a25b9b0e9e46 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -409,9 +409,12 @@ object IntervalUtils { } private object ParseState extends Enumeration { + type ParseState = Value + val PREFIX, BEGIN_VALUE, PARSE_SIGN, + TRIM_VALUE, PARSE_UNIT_VALUE, FRACTIONAL_PART, BEGIN_UNIT_NAME, @@ -439,7 +442,7 @@ object IntervalUtils { val s = input.trim.toLowerCase // scalastyle:on val bytes = s.getBytes - if (bytes.length == 0) { + if (bytes.isEmpty) { return null } var state = PREFIX @@ -452,6 +455,13 @@ object IntervalUtils { var fractionScale: Int = 0 var fraction: Int = 0 + def trimToNextState(b: Byte, next: ParseState): Unit = { + b match { + case ' ' => i += 1 + case _ => state = next + } + } + while (i < bytes.length) { val b = bytes(i) state match { @@ -464,11 +474,7 @@ object IntervalUtils { } } state = BEGIN_VALUE - case BEGIN_VALUE => - b match { - case ' ' => i += 1 - case _ => state = PARSE_SIGN - } + case BEGIN_VALUE => trimToNextState(b, PARSE_SIGN) case PARSE_SIGN => b match { case '-' => @@ -486,7 +492,8 @@ object IntervalUtils { // Sets the scale to an invalid value to track fraction presence // in the BEGIN_UNIT_NAME state fractionScale = -1 - state = PARSE_UNIT_VALUE + state = TRIM_VALUE + case TRIM_VALUE => trimToNextState(b, PARSE_UNIT_VALUE) case PARSE_UNIT_VALUE => b match { case _ if '0' <= b && b <= '9' => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index 721fb822ed94f..7f2e93c9f04ac 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -78,6 +78,7 @@ class IntervalUtilsSuite extends SparkFunSuite { "-1 MONTH 1 day -1 microseconds" -> new CalendarInterval(-1, 1, -1), " 123 MONTHS 123 DAYS 123 Microsecond " -> new CalendarInterval(123, 123, 123), "interval -1 day +3 Microseconds" -> new CalendarInterval(0, -1, 3), + "interval - 1 day + 3 Microseconds" -> new CalendarInterval(0, -1, 3), " interval 8 years -11 months 123 weeks -1 day " + "23 hours -22 minutes 1 second -123 millisecond 567 microseconds " -> new CalendarInterval(85, 860, 81480877567L)).foreach { case (input, expected) => From 8f0dba04b33e325ea9c32d4f708fa962d10ccda1 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sun, 10 Nov 2019 01:41:57 +0800 Subject: [PATCH 02/12] benchmark and unit tests --- .../benchmarks/IntervalBenchmark-results.txt | 48 ++++++++------- .../resources/sql-tests/inputs/interval.sql | 9 +++ .../sql-tests/results/interval.sql.out | 58 ++++++++++++++++++- .../benchmark/IntervalBenchmark.scala | 3 +- 4 files changed, 94 insertions(+), 24 deletions(-) diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt index 78cf66447a81d..7a45233fe0cca 100644 --- a/sql/core/benchmarks/IntervalBenchmark-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-results.txt @@ -1,25 +1,29 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.1 -Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Java HotSpot(TM) 64-Bit Server VM 1.8.0_65-b17 on Mac OS X 10.14.6 +Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -prepare string w/ interval 422 437 16 2.4 421.8 1.0X -prepare string w/o interval 369 374 8 2.7 369.4 1.1X -1 units w/ interval 426 430 5 2.3 425.5 1.0X -1 units w/o interval 382 386 5 2.6 382.1 1.1X -2 units w/ interval 519 527 9 1.9 518.5 0.8X -2 units w/o interval 505 512 6 2.0 505.4 0.8X -3 units w/ interval 650 653 3 1.5 649.6 0.6X -3 units w/o interval 630 633 4 1.6 629.7 0.7X -4 units w/ interval 755 761 6 1.3 754.9 0.6X -4 units w/o interval 745 749 3 1.3 745.3 0.6X -5 units w/ interval 882 891 14 1.1 882.0 0.5X -5 units w/o interval 867 870 3 1.2 867.4 0.5X -6 units w/ interval 1008 1013 4 1.0 1008.2 0.4X -6 units w/o interval 990 995 5 1.0 990.4 0.4X -7 units w/ interval 1057 1063 6 0.9 1056.9 0.4X -7 units w/o interval 1042 1046 4 1.0 1042.3 0.4X -8 units w/ interval 1206 1208 2 0.8 1206.0 0.3X -8 units w/o interval 1194 1198 4 0.8 1194.1 0.4X -9 units w/ interval 1322 1324 3 0.8 1321.5 0.3X -9 units w/o interval 1314 1318 4 0.8 1313.6 0.3X +prepare string w/ interval 483 562 68 2.1 483.4 1.0X +prepare string w/o interval 444 451 8 2.3 443.7 1.1X +1 units w/ interval 481 508 25 2.1 480.6 1.0X +1 units w/o interval 428 453 22 2.3 427.7 1.1X +2 units w/ interval 613 664 47 1.6 613.2 0.8X +2 units w/o interval 598 632 32 1.7 597.9 0.8X +3 units w/ interval 1127 1159 35 0.9 1127.5 0.4X +3 units w/o interval 1116 1131 18 0.9 1116.2 0.4X +4 units w/ interval 1263 1276 12 0.8 1263.3 0.4X +4 units w/o interval 1288 1310 29 0.8 1288.0 0.4X +5 units w/ interval 1391 1411 25 0.7 1391.1 0.3X +5 units w/o interval 1384 1405 20 0.7 1384.2 0.3X +6 units w/ interval 1528 1537 13 0.7 1527.7 0.3X +6 units w/o interval 1550 1550 1 0.6 1549.6 0.3X +7 units w/ interval 1789 1797 12 0.6 1789.1 0.3X +7 units w/o interval 1756 1851 83 0.6 1755.8 0.3X +8 units w/ interval 1860 1867 10 0.5 1859.7 0.3X +8 units w/o interval 1848 1853 6 0.5 1848.5 0.3X +9 units w/ interval 1928 1938 9 0.5 1928.0 0.3X +9 units w/o interval 1918 1924 5 0.5 1918.3 0.3X +10 units w/ interval 2078 2088 10 0.5 2078.3 0.2X +10 units w/o interval 2073 2084 20 0.5 2072.7 0.2X +11 units w/ interval 2207 2221 12 0.5 2207.2 0.2X +11 units w/o interval 2213 2223 16 0.5 2213.4 0.2X diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 7babe05ef3e6c..38dc0abd35240 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -41,3 +41,12 @@ select max(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 se -- min select min(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 seconds') t(v); + +-- SPARK-29605: cast string to intervals +select cast(v as interval) from values ('1 second') t(v); +select cast(v as interval) from values ('+1 second') t(v); +select cast(v as interval) from values ('-1 second') t(v); +select cast(v as interval) from values ('+ 1 second') t(v); +select cast(v as interval) from values ('- 1 second') t(v); +select cast(v as interval) from values ('- -1 second') t(v); +select cast(v as interval) from values ('- +1 second') t(v); diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 58de1331c1b3f..56add9ab66770 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 22 +-- Number of queries: 29 -- !query 0 @@ -178,3 +178,59 @@ select min(cast(v as interval)) from VALUES ('1 seconds'), ('4 seconds'), ('3 se struct -- !query 21 output 1 seconds + + +-- !query 22 +select cast(v as interval) from values ('1 second') t(v) +-- !query 22 schema +struct +-- !query 22 output +1 seconds + + +-- !query 23 +select cast(v as interval) from values ('+1 second') t(v) +-- !query 23 schema +struct +-- !query 23 output +1 seconds + + +-- !query 24 +select cast(v as interval) from values ('-1 second') t(v) +-- !query 24 schema +struct +-- !query 24 output +-1 seconds + + +-- !query 25 +select cast(v as interval) from values ('+ 1 second') t(v) +-- !query 25 schema +struct +-- !query 25 output +1 seconds + + +-- !query 26 +select cast(v as interval) from values ('- 1 second') t(v) +-- !query 26 schema +struct +-- !query 26 output +-1 seconds + + +-- !query 27 +select cast(v as interval) from values ('- -1 second') t(v) +-- !query 27 schema +struct +-- !query 27 output +NULL + + +-- !query 28 +select cast(v as interval) from values ('- +1 second') t(v) +-- !query 28 schema +struct +-- !query 28 output +NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala index d75cb1040f31e..b9bb6f5febd7f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala @@ -83,7 +83,8 @@ object IntervalBenchmark extends SqlBasedBenchmark { override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val N = 1000000 val timeUnits = Seq( - "13 months", "100 weeks", "9 days", "12 hours", + "13 months", " 1 months", + "100 weeks", "9 days", "12 hours", "- 3 hours", "5 minutes", "45 seconds", "123 milliseconds", "567 microseconds") val intervalToTest = ListBuffer[String]() From 925e8e7e7a21ca6cd8862e089e1136bb8450256b Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sun, 10 Nov 2019 09:53:12 +0800 Subject: [PATCH 03/12] benchmark with jdk 1.8.0_231-b11 --- .../benchmarks/IntervalBenchmark-results.txt | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt index 7a45233fe0cca..6ae5a4bd09f6d 100644 --- a/sql/core/benchmarks/IntervalBenchmark-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-results.txt @@ -1,29 +1,29 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_65-b17 on Mac OS X 10.14.6 +Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.14.6 Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -prepare string w/ interval 483 562 68 2.1 483.4 1.0X -prepare string w/o interval 444 451 8 2.3 443.7 1.1X -1 units w/ interval 481 508 25 2.1 480.6 1.0X -1 units w/o interval 428 453 22 2.3 427.7 1.1X -2 units w/ interval 613 664 47 1.6 613.2 0.8X -2 units w/o interval 598 632 32 1.7 597.9 0.8X -3 units w/ interval 1127 1159 35 0.9 1127.5 0.4X -3 units w/o interval 1116 1131 18 0.9 1116.2 0.4X -4 units w/ interval 1263 1276 12 0.8 1263.3 0.4X -4 units w/o interval 1288 1310 29 0.8 1288.0 0.4X -5 units w/ interval 1391 1411 25 0.7 1391.1 0.3X -5 units w/o interval 1384 1405 20 0.7 1384.2 0.3X -6 units w/ interval 1528 1537 13 0.7 1527.7 0.3X -6 units w/o interval 1550 1550 1 0.6 1549.6 0.3X -7 units w/ interval 1789 1797 12 0.6 1789.1 0.3X -7 units w/o interval 1756 1851 83 0.6 1755.8 0.3X -8 units w/ interval 1860 1867 10 0.5 1859.7 0.3X -8 units w/o interval 1848 1853 6 0.5 1848.5 0.3X -9 units w/ interval 1928 1938 9 0.5 1928.0 0.3X -9 units w/o interval 1918 1924 5 0.5 1918.3 0.3X -10 units w/ interval 2078 2088 10 0.5 2078.3 0.2X -10 units w/o interval 2073 2084 20 0.5 2072.7 0.2X -11 units w/ interval 2207 2221 12 0.5 2207.2 0.2X -11 units w/o interval 2213 2223 16 0.5 2213.4 0.2X +prepare string w/ interval 531 566 34 1.9 530.5 1.0X +prepare string w/o interval 466 479 21 2.1 466.5 1.1X +1 units w/ interval 475 521 63 2.1 475.0 1.1X +1 units w/o interval 440 457 25 2.3 440.1 1.2X +2 units w/ interval 614 621 11 1.6 613.7 0.9X +2 units w/o interval 596 605 8 1.7 596.5 0.9X +3 units w/ interval 1115 1120 4 0.9 1115.0 0.5X +3 units w/o interval 1100 1107 6 0.9 1100.2 0.5X +4 units w/ interval 1255 1263 9 0.8 1255.1 0.4X +4 units w/o interval 1254 1393 130 0.8 1253.8 0.4X +5 units w/ interval 1367 1373 5 0.7 1367.2 0.4X +5 units w/o interval 1366 1376 9 0.7 1366.2 0.4X +6 units w/ interval 1526 1530 6 0.7 1526.0 0.3X +6 units w/o interval 1504 1510 7 0.7 1504.0 0.4X +7 units w/ interval 1748 1778 27 0.6 1748.0 0.3X +7 units w/o interval 1740 1744 5 0.6 1740.0 0.3X +8 units w/ interval 2092 2107 14 0.5 2092.5 0.3X +8 units w/o interval 2094 2098 5 0.5 2094.4 0.3X +9 units w/ interval 1874 1880 5 0.5 1873.9 0.3X +9 units w/o interval 1867 1872 4 0.5 1867.3 0.3X +10 units w/ interval 2127 2134 13 0.5 2126.5 0.2X +10 units w/o interval 2045 2049 6 0.5 2045.0 0.3X +11 units w/ interval 2242 2254 13 0.4 2241.9 0.2X +11 units w/o interval 2221 2227 6 0.5 2221.1 0.2X From 65e2a1c09746bb64acdc1e77dae542b91bba045c Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sun, 10 Nov 2019 10:06:15 +0800 Subject: [PATCH 04/12] benchmark with jdk-11.0.5 --- .../IntervalBenchmark-jdk11-results.txt | 48 ++++++++++--------- .../benchmarks/IntervalBenchmark-results.txt | 29 ----------- 2 files changed, 26 insertions(+), 51 deletions(-) delete mode 100644 sql/core/benchmarks/IntervalBenchmark-results.txt diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt index 31fb7080260ef..ba3ab023f0e61 100644 --- a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt @@ -1,25 +1,29 @@ -OpenJDK 64-Bit Server VM 11.0.2+9 on Mac OS X 10.15.1 -Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.14.6 +Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -prepare string w/ interval 442 472 41 2.3 442.4 1.0X -prepare string w/o interval 420 423 6 2.4 419.6 1.1X -1 units w/ interval 350 359 9 2.9 349.8 1.3X -1 units w/o interval 316 317 1 3.2 316.4 1.4X -2 units w/ interval 457 459 2 2.2 457.0 1.0X -2 units w/o interval 432 435 3 2.3 432.2 1.0X -3 units w/ interval 610 613 3 1.6 609.8 0.7X -3 units w/o interval 581 583 2 1.7 580.5 0.8X -4 units w/ interval 720 724 4 1.4 720.4 0.6X -4 units w/o interval 699 704 8 1.4 699.4 0.6X -5 units w/ interval 850 850 0 1.2 849.9 0.5X -5 units w/o interval 829 832 5 1.2 828.7 0.5X -6 units w/ interval 927 932 4 1.1 927.1 0.5X -6 units w/o interval 891 892 1 1.1 890.5 0.5X -7 units w/ interval 1033 1040 8 1.0 1033.2 0.4X -7 units w/o interval 1020 1024 5 1.0 1020.2 0.4X -8 units w/ interval 1168 1169 2 0.9 1168.0 0.4X -8 units w/o interval 1155 1157 2 0.9 1154.5 0.4X -9 units w/ interval 1326 1328 3 0.8 1326.1 0.3X -9 units w/o interval 1372 1381 14 0.7 1372.5 0.3X +prepare string w/ interval 707 930 269 1.4 706.8 1.0X +prepare string w/o interval 660 759 141 1.5 659.7 1.1X +1 units w/ interval 526 777 263 1.9 526.0 1.3X +1 units w/o interval 453 462 8 2.2 453.4 1.6X +2 units w/ interval 636 663 23 1.6 636.5 1.1X +2 units w/o interval 643 655 19 1.6 643.4 1.1X +3 units w/ interval 1160 1186 31 0.9 1160.4 0.6X +3 units w/o interval 1136 1212 76 0.9 1135.6 0.6X +4 units w/ interval 1316 1401 124 0.8 1316.4 0.5X +4 units w/o interval 1284 1297 11 0.8 1284.4 0.6X +5 units w/ interval 1404 1412 8 0.7 1404.0 0.5X +5 units w/o interval 1404 1407 3 0.7 1404.2 0.5X +6 units w/ interval 1585 1591 7 0.6 1585.1 0.4X +6 units w/o interval 1577 1580 3 0.6 1576.8 0.4X +7 units w/ interval 1798 1807 8 0.6 1797.6 0.4X +7 units w/o interval 1785 1794 15 0.6 1784.7 0.4X +8 units w/ interval 1939 1952 16 0.5 1939.2 0.4X +8 units w/o interval 1921 1926 6 0.5 1920.8 0.4X +9 units w/ interval 1965 1972 11 0.5 1964.5 0.4X +9 units w/o interval 1954 1964 14 0.5 1954.0 0.4X +10 units w/ interval 2107 2112 6 0.5 2107.5 0.3X +10 units w/o interval 2099 2107 7 0.5 2099.0 0.3X +11 units w/ interval 2297 2298 1 0.4 2296.8 0.3X +11 units w/o interval 2300 2349 84 0.4 2300.1 0.3X diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt deleted file mode 100644 index 6ae5a4bd09f6d..0000000000000 --- a/sql/core/benchmarks/IntervalBenchmark-results.txt +++ /dev/null @@ -1,29 +0,0 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.14.6 -Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz -cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -prepare string w/ interval 531 566 34 1.9 530.5 1.0X -prepare string w/o interval 466 479 21 2.1 466.5 1.1X -1 units w/ interval 475 521 63 2.1 475.0 1.1X -1 units w/o interval 440 457 25 2.3 440.1 1.2X -2 units w/ interval 614 621 11 1.6 613.7 0.9X -2 units w/o interval 596 605 8 1.7 596.5 0.9X -3 units w/ interval 1115 1120 4 0.9 1115.0 0.5X -3 units w/o interval 1100 1107 6 0.9 1100.2 0.5X -4 units w/ interval 1255 1263 9 0.8 1255.1 0.4X -4 units w/o interval 1254 1393 130 0.8 1253.8 0.4X -5 units w/ interval 1367 1373 5 0.7 1367.2 0.4X -5 units w/o interval 1366 1376 9 0.7 1366.2 0.4X -6 units w/ interval 1526 1530 6 0.7 1526.0 0.3X -6 units w/o interval 1504 1510 7 0.7 1504.0 0.4X -7 units w/ interval 1748 1778 27 0.6 1748.0 0.3X -7 units w/o interval 1740 1744 5 0.6 1740.0 0.3X -8 units w/ interval 2092 2107 14 0.5 2092.5 0.3X -8 units w/o interval 2094 2098 5 0.5 2094.4 0.3X -9 units w/ interval 1874 1880 5 0.5 1873.9 0.3X -9 units w/o interval 1867 1872 4 0.5 1867.3 0.3X -10 units w/ interval 2127 2134 13 0.5 2126.5 0.2X -10 units w/o interval 2045 2049 6 0.5 2045.0 0.3X -11 units w/ interval 2242 2254 13 0.4 2241.9 0.2X -11 units w/o interval 2221 2227 6 0.5 2221.1 0.2X - From 200fd5d175b617bcfe7615bbbe436783bf70c594 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sun, 10 Nov 2019 10:15:37 +0800 Subject: [PATCH 05/12] Revert "benchmark with jdk-11.0.5" This reverts commit 65e2a1c09746bb64acdc1e77dae542b91bba045c. --- .../IntervalBenchmark-jdk11-results.txt | 48 +++++++++---------- .../benchmarks/IntervalBenchmark-results.txt | 29 +++++++++++ 2 files changed, 51 insertions(+), 26 deletions(-) create mode 100644 sql/core/benchmarks/IntervalBenchmark-results.txt diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt index ba3ab023f0e61..31fb7080260ef 100644 --- a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt @@ -1,29 +1,25 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.14.6 -Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 11.0.2+9 on Mac OS X 10.15.1 +Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -prepare string w/ interval 707 930 269 1.4 706.8 1.0X -prepare string w/o interval 660 759 141 1.5 659.7 1.1X -1 units w/ interval 526 777 263 1.9 526.0 1.3X -1 units w/o interval 453 462 8 2.2 453.4 1.6X -2 units w/ interval 636 663 23 1.6 636.5 1.1X -2 units w/o interval 643 655 19 1.6 643.4 1.1X -3 units w/ interval 1160 1186 31 0.9 1160.4 0.6X -3 units w/o interval 1136 1212 76 0.9 1135.6 0.6X -4 units w/ interval 1316 1401 124 0.8 1316.4 0.5X -4 units w/o interval 1284 1297 11 0.8 1284.4 0.6X -5 units w/ interval 1404 1412 8 0.7 1404.0 0.5X -5 units w/o interval 1404 1407 3 0.7 1404.2 0.5X -6 units w/ interval 1585 1591 7 0.6 1585.1 0.4X -6 units w/o interval 1577 1580 3 0.6 1576.8 0.4X -7 units w/ interval 1798 1807 8 0.6 1797.6 0.4X -7 units w/o interval 1785 1794 15 0.6 1784.7 0.4X -8 units w/ interval 1939 1952 16 0.5 1939.2 0.4X -8 units w/o interval 1921 1926 6 0.5 1920.8 0.4X -9 units w/ interval 1965 1972 11 0.5 1964.5 0.4X -9 units w/o interval 1954 1964 14 0.5 1954.0 0.4X -10 units w/ interval 2107 2112 6 0.5 2107.5 0.3X -10 units w/o interval 2099 2107 7 0.5 2099.0 0.3X -11 units w/ interval 2297 2298 1 0.4 2296.8 0.3X -11 units w/o interval 2300 2349 84 0.4 2300.1 0.3X +prepare string w/ interval 442 472 41 2.3 442.4 1.0X +prepare string w/o interval 420 423 6 2.4 419.6 1.1X +1 units w/ interval 350 359 9 2.9 349.8 1.3X +1 units w/o interval 316 317 1 3.2 316.4 1.4X +2 units w/ interval 457 459 2 2.2 457.0 1.0X +2 units w/o interval 432 435 3 2.3 432.2 1.0X +3 units w/ interval 610 613 3 1.6 609.8 0.7X +3 units w/o interval 581 583 2 1.7 580.5 0.8X +4 units w/ interval 720 724 4 1.4 720.4 0.6X +4 units w/o interval 699 704 8 1.4 699.4 0.6X +5 units w/ interval 850 850 0 1.2 849.9 0.5X +5 units w/o interval 829 832 5 1.2 828.7 0.5X +6 units w/ interval 927 932 4 1.1 927.1 0.5X +6 units w/o interval 891 892 1 1.1 890.5 0.5X +7 units w/ interval 1033 1040 8 1.0 1033.2 0.4X +7 units w/o interval 1020 1024 5 1.0 1020.2 0.4X +8 units w/ interval 1168 1169 2 0.9 1168.0 0.4X +8 units w/o interval 1155 1157 2 0.9 1154.5 0.4X +9 units w/ interval 1326 1328 3 0.8 1326.1 0.3X +9 units w/o interval 1372 1381 14 0.7 1372.5 0.3X diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt new file mode 100644 index 0000000000000..6ae5a4bd09f6d --- /dev/null +++ b/sql/core/benchmarks/IntervalBenchmark-results.txt @@ -0,0 +1,29 @@ +Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.14.6 +Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz +cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +prepare string w/ interval 531 566 34 1.9 530.5 1.0X +prepare string w/o interval 466 479 21 2.1 466.5 1.1X +1 units w/ interval 475 521 63 2.1 475.0 1.1X +1 units w/o interval 440 457 25 2.3 440.1 1.2X +2 units w/ interval 614 621 11 1.6 613.7 0.9X +2 units w/o interval 596 605 8 1.7 596.5 0.9X +3 units w/ interval 1115 1120 4 0.9 1115.0 0.5X +3 units w/o interval 1100 1107 6 0.9 1100.2 0.5X +4 units w/ interval 1255 1263 9 0.8 1255.1 0.4X +4 units w/o interval 1254 1393 130 0.8 1253.8 0.4X +5 units w/ interval 1367 1373 5 0.7 1367.2 0.4X +5 units w/o interval 1366 1376 9 0.7 1366.2 0.4X +6 units w/ interval 1526 1530 6 0.7 1526.0 0.3X +6 units w/o interval 1504 1510 7 0.7 1504.0 0.4X +7 units w/ interval 1748 1778 27 0.6 1748.0 0.3X +7 units w/o interval 1740 1744 5 0.6 1740.0 0.3X +8 units w/ interval 2092 2107 14 0.5 2092.5 0.3X +8 units w/o interval 2094 2098 5 0.5 2094.4 0.3X +9 units w/ interval 1874 1880 5 0.5 1873.9 0.3X +9 units w/o interval 1867 1872 4 0.5 1867.3 0.3X +10 units w/ interval 2127 2134 13 0.5 2126.5 0.2X +10 units w/o interval 2045 2049 6 0.5 2045.0 0.3X +11 units w/ interval 2242 2254 13 0.4 2241.9 0.2X +11 units w/o interval 2221 2227 6 0.5 2221.1 0.2X + From 8851f3538f69cd6f80ed207d0a4bdfeefd848fef Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sun, 10 Nov 2019 10:49:36 +0800 Subject: [PATCH 06/12] fix jdk11 result --- .../IntervalBenchmark-jdk11-results.txt | 48 ++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt index 31fb7080260ef..07dd8d5e44ea7 100644 --- a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt @@ -1,25 +1,29 @@ -OpenJDK 64-Bit Server VM 11.0.2+9 on Mac OS X 10.15.1 -Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.14.6 +Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -prepare string w/ interval 442 472 41 2.3 442.4 1.0X -prepare string w/o interval 420 423 6 2.4 419.6 1.1X -1 units w/ interval 350 359 9 2.9 349.8 1.3X -1 units w/o interval 316 317 1 3.2 316.4 1.4X -2 units w/ interval 457 459 2 2.2 457.0 1.0X -2 units w/o interval 432 435 3 2.3 432.2 1.0X -3 units w/ interval 610 613 3 1.6 609.8 0.7X -3 units w/o interval 581 583 2 1.7 580.5 0.8X -4 units w/ interval 720 724 4 1.4 720.4 0.6X -4 units w/o interval 699 704 8 1.4 699.4 0.6X -5 units w/ interval 850 850 0 1.2 849.9 0.5X -5 units w/o interval 829 832 5 1.2 828.7 0.5X -6 units w/ interval 927 932 4 1.1 927.1 0.5X -6 units w/o interval 891 892 1 1.1 890.5 0.5X -7 units w/ interval 1033 1040 8 1.0 1033.2 0.4X -7 units w/o interval 1020 1024 5 1.0 1020.2 0.4X -8 units w/ interval 1168 1169 2 0.9 1168.0 0.4X -8 units w/o interval 1155 1157 2 0.9 1154.5 0.4X -9 units w/ interval 1326 1328 3 0.8 1326.1 0.3X -9 units w/o interval 1372 1381 14 0.7 1372.5 0.3X +prepare string w/ interval 574 610 45 1.7 573.9 1.0X +prepare string w/o interval 518 538 27 1.9 517.7 1.1X +1 units w/ interval 425 439 16 2.4 425.3 1.3X +1 units w/o interval 385 393 10 2.6 385.2 1.5X +2 units w/ interval 553 561 11 1.8 553.1 1.0X +2 units w/o interval 531 543 11 1.9 531.0 1.1X +3 units w/ interval 1134 1159 32 0.9 1134.0 0.5X +3 units w/o interval 1121 1126 6 0.9 1121.3 0.5X +4 units w/ interval 1226 1250 21 0.8 1226.1 0.5X +4 units w/o interval 1227 1239 11 0.8 1227.1 0.5X +5 units w/ interval 1375 1447 93 0.7 1374.7 0.4X +5 units w/o interval 1335 1346 19 0.7 1335.1 0.4X +6 units w/ interval 1530 1556 24 0.7 1529.5 0.4X +6 units w/o interval 1481 1492 17 0.7 1480.7 0.4X +7 units w/ interval 1730 1745 14 0.6 1729.9 0.3X +7 units w/o interval 1788 1859 112 0.6 1788.1 0.3X +8 units w/ interval 1952 2087 117 0.5 1951.7 0.3X +8 units w/o interval 2083 2207 209 0.5 2082.5 0.3X +9 units w/ interval 2228 2291 60 0.4 2227.5 0.3X +9 units w/o interval 2130 2184 75 0.5 2130.1 0.3X +10 units w/ interval 2414 2502 81 0.4 2413.8 0.2X +10 units w/o interval 2463 2488 35 0.4 2463.1 0.2X +11 units w/ interval 2717 2755 42 0.4 2716.8 0.2X +11 units w/o interval 2578 2661 77 0.4 2577.7 0.2X From 1e8272c033a248db6e02e442b729bd1683eeaab3 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 11 Nov 2019 10:02:35 +0800 Subject: [PATCH 07/12] regen golden file --- .../resources/sql-tests/inputs/interval.sql | 2 +- .../sql-tests/results/interval.sql.out | 58 ++++++++++++++++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 0d7cdee7d0505..b91d63b181fec 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -66,7 +66,7 @@ select make_interval(1, 2, 3, 4, 5); select make_interval(1, 2, 3, 4, 5, 6); select make_interval(1, 2, 3, 4, 5, 6, 7.008009); --- SPARK-29605: cast string to intervals +-- cast string to intervals select cast(v as interval) from values ('1 second') t(v); select cast(v as interval) from values ('+1 second') t(v); select cast(v as interval) from values ('-1 second') t(v); diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 355a76d56559e..3dc453ccb4c18 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 38 +-- Number of queries: 45 -- !query 0 @@ -306,3 +306,59 @@ select make_interval(1, 2, 3, 4, 5, 6, 7.008009) struct -- !query 37 output 1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds + + +-- !query 38 +select cast(v as interval) from values ('1 second') t(v) +-- !query 38 schema +struct +-- !query 38 output +1 seconds + + +-- !query 39 +select cast(v as interval) from values ('+1 second') t(v) +-- !query 39 schema +struct +-- !query 39 output +1 seconds + + +-- !query 40 +select cast(v as interval) from values ('-1 second') t(v) +-- !query 40 schema +struct +-- !query 40 output +-1 seconds + + +-- !query 41 +select cast(v as interval) from values ('+ 1 second') t(v) +-- !query 41 schema +struct +-- !query 41 output +1 seconds + + +-- !query 42 +select cast(v as interval) from values ('- 1 second') t(v) +-- !query 42 schema +struct +-- !query 42 output +-1 seconds + + +-- !query 43 +select cast(v as interval) from values ('- -1 second') t(v) +-- !query 43 schema +struct +-- !query 43 output +NULL + + +-- !query 44 +select cast(v as interval) from values ('- +1 second') t(v) +-- !query 44 schema +struct +-- !query 44 output +NULL From cb8aa6b78f191f5a40595a96f58de0fb31ff5b5b Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 11 Nov 2019 14:21:09 +0800 Subject: [PATCH 08/12] add TRIM_UNIT --- .../sql/catalyst/util/IntervalUtils.scala | 113 +++++++++--------- 1 file changed, 55 insertions(+), 58 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 5fab08e692f11..b91b6bc8b9661 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -433,6 +433,7 @@ object IntervalUtils { TRIM_VALUE, PARSE_UNIT_VALUE, FRACTIONAL_PART, + TRIM_UNIT, BEGIN_UNIT_NAME, UNIT_NAME_SUFFIX, END_UNIT_NAME = Value @@ -521,8 +522,7 @@ object IntervalUtils { } catch { case _: ArithmeticException => return null } - case ' ' => - state = BEGIN_UNIT_NAME + case ' ' => state = TRIM_UNIT case '.' => fractionScale = (NANOS_PER_SECOND / 10).toInt state = FRACTIONAL_PART @@ -536,68 +536,65 @@ object IntervalUtils { fractionScale /= 10 case ' ' => fraction /= NANOS_PER_MICROS.toInt - state = BEGIN_UNIT_NAME + state = TRIM_UNIT case _ => return null } i += 1 + case TRIM_UNIT => trimToNextState(b, BEGIN_UNIT_NAME) case BEGIN_UNIT_NAME => - if (b == ' ') { - i += 1 - } else { - // Checks that only seconds can have the fractional part - if (b != 's' && fractionScale >= 0) { - return null - } - if (isNegative) { - currentValue = -currentValue - fraction = -fraction - } - try { - b match { - case 'y' if s.matchAt(yearStr, i) => - val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue) - months = Math.toIntExact(Math.addExact(months, monthsInYears)) - i += yearStr.numBytes() - case 'w' if s.matchAt(weekStr, i) => - val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue) - days = Math.toIntExact(Math.addExact(days, daysInWeeks)) - i += weekStr.numBytes() - case 'd' if s.matchAt(dayStr, i) => - days = Math.addExact(days, Math.toIntExact(currentValue)) - i += dayStr.numBytes() - case 'h' if s.matchAt(hourStr, i) => - val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR) - microseconds = Math.addExact(microseconds, hoursUs) - i += hourStr.numBytes() - case 's' if s.matchAt(secondStr, i) => - val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND) - microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction) - i += secondStr.numBytes() - case 'm' => - if (s.matchAt(monthStr, i)) { - months = Math.addExact(months, Math.toIntExact(currentValue)) - i += monthStr.numBytes() - } else if (s.matchAt(minuteStr, i)) { - val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE) - microseconds = Math.addExact(microseconds, minutesUs) - i += minuteStr.numBytes() - } else if (s.matchAt(millisStr, i)) { - val millisUs = Math.multiplyExact( - currentValue, - MICROS_PER_MILLIS) - microseconds = Math.addExact(microseconds, millisUs) - i += millisStr.numBytes() - } else if (s.matchAt(microsStr, i)) { - microseconds = Math.addExact(microseconds, currentValue) - i += microsStr.numBytes() - } else return null - case _ => return null - } - } catch { - case _: ArithmeticException => return null + // Checks that only seconds can have the fractional part + if (b != 's' && fractionScale >= 0) { + return null + } + if (isNegative) { + currentValue = -currentValue + fraction = -fraction + } + try { + b match { + case 'y' if s.matchAt(yearStr, i) => + val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue) + months = Math.toIntExact(Math.addExact(months, monthsInYears)) + i += yearStr.numBytes() + case 'w' if s.matchAt(weekStr, i) => + val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue) + days = Math.toIntExact(Math.addExact(days, daysInWeeks)) + i += weekStr.numBytes() + case 'd' if s.matchAt(dayStr, i) => + days = Math.addExact(days, Math.toIntExact(currentValue)) + i += dayStr.numBytes() + case 'h' if s.matchAt(hourStr, i) => + val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR) + microseconds = Math.addExact(microseconds, hoursUs) + i += hourStr.numBytes() + case 's' if s.matchAt(secondStr, i) => + val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND) + microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction) + i += secondStr.numBytes() + case 'm' => + if (s.matchAt(monthStr, i)) { + months = Math.addExact(months, Math.toIntExact(currentValue)) + i += monthStr.numBytes() + } else if (s.matchAt(minuteStr, i)) { + val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE) + microseconds = Math.addExact(microseconds, minutesUs) + i += minuteStr.numBytes() + } else if (s.matchAt(millisStr, i)) { + val millisUs = Math.multiplyExact( + currentValue, + MICROS_PER_MILLIS) + microseconds = Math.addExact(microseconds, millisUs) + i += millisStr.numBytes() + } else if (s.matchAt(microsStr, i)) { + microseconds = Math.addExact(microseconds, currentValue) + i += microsStr.numBytes() + } else return null + case _ => return null } - state = UNIT_NAME_SUFFIX + } catch { + case _: ArithmeticException => return null } + state = UNIT_NAME_SUFFIX case UNIT_NAME_SUFFIX => b match { case 's' => state = END_UNIT_NAME From 0a301a17fcc2e76a89f2f8ea1ecf21125b8b88f8 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 11 Nov 2019 15:05:20 +0800 Subject: [PATCH 09/12] naming --- .../spark/sql/catalyst/util/IntervalUtils.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index b91b6bc8b9661..25aba2f15dfd3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -430,10 +430,10 @@ object IntervalUtils { val PREFIX, BEGIN_VALUE, PARSE_SIGN, - TRIM_VALUE, + TRIM_BEFORE_PARSE_VALUE, PARSE_UNIT_VALUE, FRACTIONAL_PART, - TRIM_UNIT, + TRIM_BEFORE_PARSE_UNIT, BEGIN_UNIT_NAME, UNIT_NAME_SUFFIX, END_UNIT_NAME = Value @@ -512,8 +512,8 @@ object IntervalUtils { // Sets the scale to an invalid value to track fraction presence // in the BEGIN_UNIT_NAME state fractionScale = -1 - state = TRIM_VALUE - case TRIM_VALUE => trimToNextState(b, PARSE_UNIT_VALUE) + state = TRIM_BEFORE_PARSE_VALUE + case TRIM_BEFORE_PARSE_VALUE => trimToNextState(b, PARSE_UNIT_VALUE) case PARSE_UNIT_VALUE => b match { case _ if '0' <= b && b <= '9' => @@ -522,7 +522,7 @@ object IntervalUtils { } catch { case _: ArithmeticException => return null } - case ' ' => state = TRIM_UNIT + case ' ' => state = TRIM_BEFORE_PARSE_UNIT case '.' => fractionScale = (NANOS_PER_SECOND / 10).toInt state = FRACTIONAL_PART @@ -536,11 +536,11 @@ object IntervalUtils { fractionScale /= 10 case ' ' => fraction /= NANOS_PER_MICROS.toInt - state = TRIM_UNIT + state = TRIM_BEFORE_PARSE_UNIT case _ => return null } i += 1 - case TRIM_UNIT => trimToNextState(b, BEGIN_UNIT_NAME) + case TRIM_BEFORE_PARSE_UNIT => trimToNextState(b, BEGIN_UNIT_NAME) case BEGIN_UNIT_NAME => // Checks that only seconds can have the fractional part if (b != 's' && fractionScale >= 0) { From 83b9ae36b09a3636ee3e1761ee5b4415ed21a8af Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 11 Nov 2019 15:34:55 +0800 Subject: [PATCH 10/12] naming again --- .../sql/catalyst/util/IntervalUtils.scala | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 25aba2f15dfd3..39f5b1fb34e2b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -428,15 +428,15 @@ object IntervalUtils { type ParseState = Value val PREFIX, - BEGIN_VALUE, - PARSE_SIGN, - TRIM_BEFORE_PARSE_VALUE, - PARSE_UNIT_VALUE, - FRACTIONAL_PART, - TRIM_BEFORE_PARSE_UNIT, - BEGIN_UNIT_NAME, - UNIT_NAME_SUFFIX, - END_UNIT_NAME = Value + NEXT_VALUE_UNIT, + SIGN, + TRIM_BEFORE_VALUE, + VALUE, + VALUE_FRACTIONAL_PART, + TRIM_BEFORE_UNIT, + UNIT_BEGIN, + UNIT_SUFFIX, + UNIT_END = Value } private final val intervalStr = UTF8String.fromString("interval ") private def unitToUtf8(unit: IntervalUnit): UTF8String = { @@ -493,9 +493,9 @@ object IntervalUtils { i += intervalStr.numBytes() } } - state = BEGIN_VALUE - case BEGIN_VALUE => trimToNextState(b, PARSE_SIGN) - case PARSE_SIGN => + state = NEXT_VALUE_UNIT + case NEXT_VALUE_UNIT => trimToNextState(b, SIGN) + case SIGN => b match { case '-' => isNegative = true @@ -512,9 +512,9 @@ object IntervalUtils { // Sets the scale to an invalid value to track fraction presence // in the BEGIN_UNIT_NAME state fractionScale = -1 - state = TRIM_BEFORE_PARSE_VALUE - case TRIM_BEFORE_PARSE_VALUE => trimToNextState(b, PARSE_UNIT_VALUE) - case PARSE_UNIT_VALUE => + state = TRIM_BEFORE_VALUE + case TRIM_BEFORE_VALUE => trimToNextState(b, VALUE) + case VALUE => b match { case _ if '0' <= b && b <= '9' => try { @@ -522,26 +522,26 @@ object IntervalUtils { } catch { case _: ArithmeticException => return null } - case ' ' => state = TRIM_BEFORE_PARSE_UNIT + case ' ' => state = TRIM_BEFORE_UNIT case '.' => fractionScale = (NANOS_PER_SECOND / 10).toInt - state = FRACTIONAL_PART + state = VALUE_FRACTIONAL_PART case _ => return null } i += 1 - case FRACTIONAL_PART => + case VALUE_FRACTIONAL_PART => b match { case _ if '0' <= b && b <= '9' && fractionScale > 0 => fraction += (b - '0') * fractionScale fractionScale /= 10 case ' ' => fraction /= NANOS_PER_MICROS.toInt - state = TRIM_BEFORE_PARSE_UNIT + state = TRIM_BEFORE_UNIT case _ => return null } i += 1 - case TRIM_BEFORE_PARSE_UNIT => trimToNextState(b, BEGIN_UNIT_NAME) - case BEGIN_UNIT_NAME => + case TRIM_BEFORE_UNIT => trimToNextState(b, UNIT_BEGIN) + case UNIT_BEGIN => // Checks that only seconds can have the fractional part if (b != 's' && fractionScale >= 0) { return null @@ -594,26 +594,26 @@ object IntervalUtils { } catch { case _: ArithmeticException => return null } - state = UNIT_NAME_SUFFIX - case UNIT_NAME_SUFFIX => + state = UNIT_SUFFIX + case UNIT_SUFFIX => b match { - case 's' => state = END_UNIT_NAME - case ' ' => state = BEGIN_VALUE + case 's' => state = UNIT_END + case ' ' => state = NEXT_VALUE_UNIT case _ => return null } i += 1 - case END_UNIT_NAME => + case UNIT_END => b match { case ' ' => i += 1 - state = BEGIN_VALUE + state = NEXT_VALUE_UNIT case _ => return null } } } val result = state match { - case UNIT_NAME_SUFFIX | END_UNIT_NAME | BEGIN_VALUE => + case UNIT_SUFFIX | UNIT_END | NEXT_VALUE_UNIT => new CalendarInterval(months, days, microseconds) case _ => null } From 34bf71910830e1efabe4af635b2f0ef6692f57d1 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 11 Nov 2019 15:45:42 +0800 Subject: [PATCH 11/12] naming again --- .../spark/sql/catalyst/util/IntervalUtils.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 39f5b1fb34e2b..7e903274ef713 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -428,7 +428,7 @@ object IntervalUtils { type ParseState = Value val PREFIX, - NEXT_VALUE_UNIT, + TRIM_BEFORE_SIGN, SIGN, TRIM_BEFORE_VALUE, VALUE, @@ -493,8 +493,8 @@ object IntervalUtils { i += intervalStr.numBytes() } } - state = NEXT_VALUE_UNIT - case NEXT_VALUE_UNIT => trimToNextState(b, SIGN) + state = TRIM_BEFORE_SIGN + case TRIM_BEFORE_SIGN => trimToNextState(b, SIGN) case SIGN => b match { case '-' => @@ -598,7 +598,7 @@ object IntervalUtils { case UNIT_SUFFIX => b match { case 's' => state = UNIT_END - case ' ' => state = NEXT_VALUE_UNIT + case ' ' => state = TRIM_BEFORE_SIGN case _ => return null } i += 1 @@ -606,14 +606,14 @@ object IntervalUtils { b match { case ' ' => i += 1 - state = NEXT_VALUE_UNIT + state = TRIM_BEFORE_SIGN case _ => return null } } } val result = state match { - case UNIT_SUFFIX | UNIT_END | NEXT_VALUE_UNIT => + case UNIT_SUFFIX | UNIT_END | TRIM_BEFORE_SIGN => new CalendarInterval(months, days, microseconds) case _ => null } From cb83761fc5872a574b1734b5568a14a4bcf5e72c Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 11 Nov 2019 16:23:03 +0800 Subject: [PATCH 12/12] tests --- .../resources/sql-tests/inputs/interval.sql | 14 +++++----- .../sql-tests/results/interval.sql.out | 28 +++++++++---------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index b91d63b181fec..148d84942a682 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -67,10 +67,10 @@ select make_interval(1, 2, 3, 4, 5, 6); select make_interval(1, 2, 3, 4, 5, 6, 7.008009); -- cast string to intervals -select cast(v as interval) from values ('1 second') t(v); -select cast(v as interval) from values ('+1 second') t(v); -select cast(v as interval) from values ('-1 second') t(v); -select cast(v as interval) from values ('+ 1 second') t(v); -select cast(v as interval) from values ('- 1 second') t(v); -select cast(v as interval) from values ('- -1 second') t(v); -select cast(v as interval) from values ('- +1 second') t(v); +select cast('1 second' as interval); +select cast('+1 second' as interval); +select cast('-1 second' as interval); +select cast('+ 1 second' as interval); +select cast('- 1 second' as interval); +select cast('- -1 second' as interval); +select cast('- +1 second' as interval); diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 3dc453ccb4c18..1bbeeb2085e43 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -309,56 +309,56 @@ struct -- !query 38 -select cast(v as interval) from values ('1 second') t(v) +select cast('1 second' as interval) -- !query 38 schema -struct +struct -- !query 38 output 1 seconds -- !query 39 -select cast(v as interval) from values ('+1 second') t(v) +select cast('+1 second' as interval) -- !query 39 schema -struct +struct -- !query 39 output 1 seconds -- !query 40 -select cast(v as interval) from values ('-1 second') t(v) +select cast('-1 second' as interval) -- !query 40 schema -struct +struct -- !query 40 output -1 seconds -- !query 41 -select cast(v as interval) from values ('+ 1 second') t(v) +select cast('+ 1 second' as interval) -- !query 41 schema -struct +struct -- !query 41 output 1 seconds -- !query 42 -select cast(v as interval) from values ('- 1 second') t(v) +select cast('- 1 second' as interval) -- !query 42 schema -struct +struct -- !query 42 output -1 seconds -- !query 43 -select cast(v as interval) from values ('- -1 second') t(v) +select cast('- -1 second' as interval) -- !query 43 schema -struct +struct -- !query 43 output NULL -- !query 44 -select cast(v as interval) from values ('- +1 second') t(v) +select cast('- +1 second' as interval) -- !query 44 schema -struct +struct -- !query 44 output NULL