diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 19bb44f1e48a9..7e903274ef713 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -425,14 +425,18 @@ object IntervalUtils { } private object ParseState extends Enumeration { + type ParseState = Value + val PREFIX, - BEGIN_VALUE, - PARSE_SIGN, - PARSE_UNIT_VALUE, - FRACTIONAL_PART, - BEGIN_UNIT_NAME, - UNIT_NAME_SUFFIX, - END_UNIT_NAME = Value + TRIM_BEFORE_SIGN, + SIGN, + TRIM_BEFORE_VALUE, + VALUE, + VALUE_FRACTIONAL_PART, + TRIM_BEFORE_UNIT, + UNIT_BEGIN, + UNIT_SUFFIX, + UNIT_END = Value } private final val intervalStr = UTF8String.fromString("interval ") private def unitToUtf8(unit: IntervalUnit): UTF8String = { @@ -458,7 +462,7 @@ object IntervalUtils { val s = input.trim.toLowerCase // scalastyle:on val bytes = s.getBytes - if (bytes.length == 0) { + if (bytes.isEmpty) { return null } var state = PREFIX @@ -471,6 +475,13 @@ object IntervalUtils { var fractionScale: Int = 0 var fraction: Int = 0 + def trimToNextState(b: Byte, next: ParseState): Unit = { + b match { + case ' ' => i += 1 + case _ => state = next + } + } + while (i < bytes.length) { val b = bytes(i) state match { @@ -482,13 +493,9 @@ object IntervalUtils { i += intervalStr.numBytes() } } - state = BEGIN_VALUE - case BEGIN_VALUE => - b match { - case ' ' => i += 1 - case _ => state = PARSE_SIGN - } - case PARSE_SIGN => + state = TRIM_BEFORE_SIGN + case TRIM_BEFORE_SIGN => trimToNextState(b, SIGN) + case SIGN => b match { case '-' => isNegative = true @@ -505,8 +512,9 @@ object IntervalUtils { // Sets the scale to an invalid value to track fraction presence // in the BEGIN_UNIT_NAME state fractionScale = -1 - state = PARSE_UNIT_VALUE - case PARSE_UNIT_VALUE => + state = TRIM_BEFORE_VALUE + case TRIM_BEFORE_VALUE => trimToNextState(b, VALUE) + case VALUE => b match { case _ if '0' <= b && b <= '9' => try { @@ -514,102 +522,98 @@ object IntervalUtils { } catch { case _: ArithmeticException => return null } - case ' ' => - state = BEGIN_UNIT_NAME + case ' ' => state = TRIM_BEFORE_UNIT case '.' => fractionScale = (NANOS_PER_SECOND / 10).toInt - state = FRACTIONAL_PART + state = VALUE_FRACTIONAL_PART case _ => return null } i += 1 - case FRACTIONAL_PART => + case VALUE_FRACTIONAL_PART => b match { case _ if '0' <= b && b <= '9' && fractionScale > 0 => fraction += (b - '0') * fractionScale fractionScale /= 10 case ' ' => fraction /= NANOS_PER_MICROS.toInt - state = BEGIN_UNIT_NAME + state = TRIM_BEFORE_UNIT case _ => return null } i += 1 - case BEGIN_UNIT_NAME => - if (b == ' ') { - i += 1 - } else { - // Checks that only seconds can have the fractional part - if (b != 's' && fractionScale >= 0) { - return null - } - if (isNegative) { - currentValue = -currentValue - fraction = -fraction - } - try { - b match { - case 'y' if s.matchAt(yearStr, i) => - val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue) - months = Math.toIntExact(Math.addExact(months, monthsInYears)) - i += yearStr.numBytes() - case 'w' if s.matchAt(weekStr, i) => - val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue) - days = Math.toIntExact(Math.addExact(days, daysInWeeks)) - i += weekStr.numBytes() - case 'd' if s.matchAt(dayStr, i) => - days = Math.addExact(days, Math.toIntExact(currentValue)) - i += dayStr.numBytes() - case 'h' if s.matchAt(hourStr, i) => - val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR) - microseconds = Math.addExact(microseconds, hoursUs) - i += hourStr.numBytes() - case 's' if s.matchAt(secondStr, i) => - val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND) - microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction) - i += secondStr.numBytes() - case 'm' => - if (s.matchAt(monthStr, i)) { - months = Math.addExact(months, Math.toIntExact(currentValue)) - i += monthStr.numBytes() - } else if (s.matchAt(minuteStr, i)) { - val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE) - microseconds = Math.addExact(microseconds, minutesUs) - i += minuteStr.numBytes() - } else if (s.matchAt(millisStr, i)) { - val millisUs = Math.multiplyExact( - currentValue, - MICROS_PER_MILLIS) - microseconds = Math.addExact(microseconds, millisUs) - i += millisStr.numBytes() - } else if (s.matchAt(microsStr, i)) { - microseconds = Math.addExact(microseconds, currentValue) - i += microsStr.numBytes() - } else return null - case _ => return null - } - } catch { - case _: ArithmeticException => return null + case TRIM_BEFORE_UNIT => trimToNextState(b, UNIT_BEGIN) + case UNIT_BEGIN => + // Checks that only seconds can have the fractional part + if (b != 's' && fractionScale >= 0) { + return null + } + if (isNegative) { + currentValue = -currentValue + fraction = -fraction + } + try { + b match { + case 'y' if s.matchAt(yearStr, i) => + val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue) + months = Math.toIntExact(Math.addExact(months, monthsInYears)) + i += yearStr.numBytes() + case 'w' if s.matchAt(weekStr, i) => + val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue) + days = Math.toIntExact(Math.addExact(days, daysInWeeks)) + i += weekStr.numBytes() + case 'd' if s.matchAt(dayStr, i) => + days = Math.addExact(days, Math.toIntExact(currentValue)) + i += dayStr.numBytes() + case 'h' if s.matchAt(hourStr, i) => + val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR) + microseconds = Math.addExact(microseconds, hoursUs) + i += hourStr.numBytes() + case 's' if s.matchAt(secondStr, i) => + val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND) + microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction) + i += secondStr.numBytes() + case 'm' => + if (s.matchAt(monthStr, i)) { + months = Math.addExact(months, Math.toIntExact(currentValue)) + i += monthStr.numBytes() + } else if (s.matchAt(minuteStr, i)) { + val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE) + microseconds = Math.addExact(microseconds, minutesUs) + i += minuteStr.numBytes() + } else if (s.matchAt(millisStr, i)) { + val millisUs = Math.multiplyExact( + currentValue, + MICROS_PER_MILLIS) + microseconds = Math.addExact(microseconds, millisUs) + i += millisStr.numBytes() + } else if (s.matchAt(microsStr, i)) { + microseconds = Math.addExact(microseconds, currentValue) + i += microsStr.numBytes() + } else return null + case _ => return null } - state = UNIT_NAME_SUFFIX + } catch { + case _: ArithmeticException => return null } - case UNIT_NAME_SUFFIX => + state = UNIT_SUFFIX + case UNIT_SUFFIX => b match { - case 's' => state = END_UNIT_NAME - case ' ' => state = BEGIN_VALUE + case 's' => state = UNIT_END + case ' ' => state = TRIM_BEFORE_SIGN case _ => return null } i += 1 - case END_UNIT_NAME => + case UNIT_END => b match { case ' ' => i += 1 - state = BEGIN_VALUE + state = TRIM_BEFORE_SIGN case _ => return null } } } val result = state match { - case UNIT_NAME_SUFFIX | END_UNIT_NAME | BEGIN_VALUE => + case UNIT_SUFFIX | UNIT_END | TRIM_BEFORE_SIGN => new CalendarInterval(months, days, microseconds) case _ => null } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index 75b0afceca144..0a16dbfdebde7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -79,6 +79,7 @@ class IntervalUtilsSuite extends SparkFunSuite { "-1 MONTH 1 day -1 microseconds" -> new CalendarInterval(-1, 1, -1), " 123 MONTHS 123 DAYS 123 Microsecond " -> new CalendarInterval(123, 123, 123), "interval -1 day +3 Microseconds" -> new CalendarInterval(0, -1, 3), + "interval - 1 day + 3 Microseconds" -> new CalendarInterval(0, -1, 3), " interval 8 years -11 months 123 weeks -1 day " + "23 hours -22 minutes 1 second -123 millisecond 567 microseconds " -> new CalendarInterval(85, 860, 81480877567L)).foreach { case (input, expected) => diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt index 31fb7080260ef..07dd8d5e44ea7 100644 --- a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt @@ -1,25 +1,29 @@ -OpenJDK 64-Bit Server VM 11.0.2+9 on Mac OS X 10.15.1 -Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.14.6 +Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -prepare string w/ interval 442 472 41 2.3 442.4 1.0X -prepare string w/o interval 420 423 6 2.4 419.6 1.1X -1 units w/ interval 350 359 9 2.9 349.8 1.3X -1 units w/o interval 316 317 1 3.2 316.4 1.4X -2 units w/ interval 457 459 2 2.2 457.0 1.0X -2 units w/o interval 432 435 3 2.3 432.2 1.0X -3 units w/ interval 610 613 3 1.6 609.8 0.7X -3 units w/o interval 581 583 2 1.7 580.5 0.8X -4 units w/ interval 720 724 4 1.4 720.4 0.6X -4 units w/o interval 699 704 8 1.4 699.4 0.6X -5 units w/ interval 850 850 0 1.2 849.9 0.5X -5 units w/o interval 829 832 5 1.2 828.7 0.5X -6 units w/ interval 927 932 4 1.1 927.1 0.5X -6 units w/o interval 891 892 1 1.1 890.5 0.5X -7 units w/ interval 1033 1040 8 1.0 1033.2 0.4X -7 units w/o interval 1020 1024 5 1.0 1020.2 0.4X -8 units w/ interval 1168 1169 2 0.9 1168.0 0.4X -8 units w/o interval 1155 1157 2 0.9 1154.5 0.4X -9 units w/ interval 1326 1328 3 0.8 1326.1 0.3X -9 units w/o interval 1372 1381 14 0.7 1372.5 0.3X +prepare string w/ interval 574 610 45 1.7 573.9 1.0X +prepare string w/o interval 518 538 27 1.9 517.7 1.1X +1 units w/ interval 425 439 16 2.4 425.3 1.3X +1 units w/o interval 385 393 10 2.6 385.2 1.5X +2 units w/ interval 553 561 11 1.8 553.1 1.0X +2 units w/o interval 531 543 11 1.9 531.0 1.1X +3 units w/ interval 1134 1159 32 0.9 1134.0 0.5X +3 units w/o interval 1121 1126 6 0.9 1121.3 0.5X +4 units w/ interval 1226 1250 21 0.8 1226.1 0.5X +4 units w/o interval 1227 1239 11 0.8 1227.1 0.5X +5 units w/ interval 1375 1447 93 0.7 1374.7 0.4X +5 units w/o interval 1335 1346 19 0.7 1335.1 0.4X +6 units w/ interval 1530 1556 24 0.7 1529.5 0.4X +6 units w/o interval 1481 1492 17 0.7 1480.7 0.4X +7 units w/ interval 1730 1745 14 0.6 1729.9 0.3X +7 units w/o interval 1788 1859 112 0.6 1788.1 0.3X +8 units w/ interval 1952 2087 117 0.5 1951.7 0.3X +8 units w/o interval 2083 2207 209 0.5 2082.5 0.3X +9 units w/ interval 2228 2291 60 0.4 2227.5 0.3X +9 units w/o interval 2130 2184 75 0.5 2130.1 0.3X +10 units w/ interval 2414 2502 81 0.4 2413.8 0.2X +10 units w/o interval 2463 2488 35 0.4 2463.1 0.2X +11 units w/ interval 2717 2755 42 0.4 2716.8 0.2X +11 units w/o interval 2578 2661 77 0.4 2577.7 0.2X diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt index 78cf66447a81d..6ae5a4bd09f6d 100644 --- a/sql/core/benchmarks/IntervalBenchmark-results.txt +++ b/sql/core/benchmarks/IntervalBenchmark-results.txt @@ -1,25 +1,29 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.1 -Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz +Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.14.6 +Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -prepare string w/ interval 422 437 16 2.4 421.8 1.0X -prepare string w/o interval 369 374 8 2.7 369.4 1.1X -1 units w/ interval 426 430 5 2.3 425.5 1.0X -1 units w/o interval 382 386 5 2.6 382.1 1.1X -2 units w/ interval 519 527 9 1.9 518.5 0.8X -2 units w/o interval 505 512 6 2.0 505.4 0.8X -3 units w/ interval 650 653 3 1.5 649.6 0.6X -3 units w/o interval 630 633 4 1.6 629.7 0.7X -4 units w/ interval 755 761 6 1.3 754.9 0.6X -4 units w/o interval 745 749 3 1.3 745.3 0.6X -5 units w/ interval 882 891 14 1.1 882.0 0.5X -5 units w/o interval 867 870 3 1.2 867.4 0.5X -6 units w/ interval 1008 1013 4 1.0 1008.2 0.4X -6 units w/o interval 990 995 5 1.0 990.4 0.4X -7 units w/ interval 1057 1063 6 0.9 1056.9 0.4X -7 units w/o interval 1042 1046 4 1.0 1042.3 0.4X -8 units w/ interval 1206 1208 2 0.8 1206.0 0.3X -8 units w/o interval 1194 1198 4 0.8 1194.1 0.4X -9 units w/ interval 1322 1324 3 0.8 1321.5 0.3X -9 units w/o interval 1314 1318 4 0.8 1313.6 0.3X +prepare string w/ interval 531 566 34 1.9 530.5 1.0X +prepare string w/o interval 466 479 21 2.1 466.5 1.1X +1 units w/ interval 475 521 63 2.1 475.0 1.1X +1 units w/o interval 440 457 25 2.3 440.1 1.2X +2 units w/ interval 614 621 11 1.6 613.7 0.9X +2 units w/o interval 596 605 8 1.7 596.5 0.9X +3 units w/ interval 1115 1120 4 0.9 1115.0 0.5X +3 units w/o interval 1100 1107 6 0.9 1100.2 0.5X +4 units w/ interval 1255 1263 9 0.8 1255.1 0.4X +4 units w/o interval 1254 1393 130 0.8 1253.8 0.4X +5 units w/ interval 1367 1373 5 0.7 1367.2 0.4X +5 units w/o interval 1366 1376 9 0.7 1366.2 0.4X +6 units w/ interval 1526 1530 6 0.7 1526.0 0.3X +6 units w/o interval 1504 1510 7 0.7 1504.0 0.4X +7 units w/ interval 1748 1778 27 0.6 1748.0 0.3X +7 units w/o interval 1740 1744 5 0.6 1740.0 0.3X +8 units w/ interval 2092 2107 14 0.5 2092.5 0.3X +8 units w/o interval 2094 2098 5 0.5 2094.4 0.3X +9 units w/ interval 1874 1880 5 0.5 1873.9 0.3X +9 units w/o interval 1867 1872 4 0.5 1867.3 0.3X +10 units w/ interval 2127 2134 13 0.5 2126.5 0.2X +10 units w/o interval 2045 2049 6 0.5 2045.0 0.3X +11 units w/ interval 2242 2254 13 0.4 2241.9 0.2X +11 units w/o interval 2221 2227 6 0.5 2221.1 0.2X diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 2163a128aacf1..148d84942a682 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -65,3 +65,12 @@ select make_interval(1, 2, 3, 4); select make_interval(1, 2, 3, 4, 5); select make_interval(1, 2, 3, 4, 5, 6); select make_interval(1, 2, 3, 4, 5, 6, 7.008009); + +-- cast string to intervals +select cast('1 second' as interval); +select cast('+1 second' as interval); +select cast('-1 second' as interval); +select cast('+ 1 second' as interval); +select cast('- 1 second' as interval); +select cast('- -1 second' as interval); +select cast('- +1 second' as interval); diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 355a76d56559e..1bbeeb2085e43 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 38 +-- Number of queries: 45 -- !query 0 @@ -306,3 +306,59 @@ select make_interval(1, 2, 3, 4, 5, 6, 7.008009) struct -- !query 37 output 1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds + + +-- !query 38 +select cast('1 second' as interval) +-- !query 38 schema +struct +-- !query 38 output +1 seconds + + +-- !query 39 +select cast('+1 second' as interval) +-- !query 39 schema +struct +-- !query 39 output +1 seconds + + +-- !query 40 +select cast('-1 second' as interval) +-- !query 40 schema +struct +-- !query 40 output +-1 seconds + + +-- !query 41 +select cast('+ 1 second' as interval) +-- !query 41 schema +struct +-- !query 41 output +1 seconds + + +-- !query 42 +select cast('- 1 second' as interval) +-- !query 42 schema +struct +-- !query 42 output +-1 seconds + + +-- !query 43 +select cast('- -1 second' as interval) +-- !query 43 schema +struct +-- !query 43 output +NULL + + +-- !query 44 +select cast('- +1 second' as interval) +-- !query 44 schema +struct +-- !query 44 output +NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala index d75cb1040f31e..b9bb6f5febd7f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala @@ -83,7 +83,8 @@ object IntervalBenchmark extends SqlBasedBenchmark { override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val N = 1000000 val timeUnits = Seq( - "13 months", "100 weeks", "9 days", "12 hours", + "13 months", " 1 months", + "100 weeks", "9 days", "12 hours", "- 3 hours", "5 minutes", "45 seconds", "123 milliseconds", "567 microseconds") val intervalToTest = ListBuffer[String]()