-
Notifications
You must be signed in to change notification settings - Fork 28.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-29822][SQL] Fix cast error when there are white spaces between signs and values #26449
Changes from 10 commits
b1b2f7c
71acf0a
8f0dba0
925e8e7
65e2a1c
200fd5d
8851f35
48d168d
1e8272c
cb8aa6b
0a301a1
83b9ae3
34bf719
cb83761
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -425,11 +425,15 @@ object IntervalUtils { | |
} | ||
|
||
private object ParseState extends Enumeration { | ||
type ParseState = Value | ||
|
||
val PREFIX, | ||
BEGIN_VALUE, | ||
PARSE_SIGN, | ||
TRIM_VALUE, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we make the name clearer? e.g. |
||
PARSE_UNIT_VALUE, | ||
FRACTIONAL_PART, | ||
TRIM_UNIT, | ||
BEGIN_UNIT_NAME, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can add a |
||
UNIT_NAME_SUFFIX, | ||
END_UNIT_NAME = Value | ||
|
@@ -458,7 +462,7 @@ object IntervalUtils { | |
val s = input.trim.toLowerCase | ||
// scalastyle:on | ||
val bytes = s.getBytes | ||
if (bytes.length == 0) { | ||
if (bytes.isEmpty) { | ||
return null | ||
} | ||
var state = PREFIX | ||
|
@@ -471,6 +475,13 @@ object IntervalUtils { | |
var fractionScale: Int = 0 | ||
var fraction: Int = 0 | ||
|
||
def trimToNextState(b: Byte, next: ParseState): Unit = { | ||
b match { | ||
case ' ' => i += 1 | ||
case _ => state = next | ||
} | ||
} | ||
|
||
while (i < bytes.length) { | ||
val b = bytes(i) | ||
state match { | ||
|
@@ -483,11 +494,7 @@ object IntervalUtils { | |
} | ||
} | ||
state = BEGIN_VALUE | ||
case BEGIN_VALUE => | ||
b match { | ||
case ' ' => i += 1 | ||
case _ => state = PARSE_SIGN | ||
} | ||
case BEGIN_VALUE => trimToNextState(b, PARSE_SIGN) | ||
case PARSE_SIGN => | ||
b match { | ||
case '-' => | ||
|
@@ -505,7 +512,8 @@ object IntervalUtils { | |
// Sets the scale to an invalid value to track fraction presence | ||
// in the BEGIN_UNIT_NAME state | ||
fractionScale = -1 | ||
state = PARSE_UNIT_VALUE | ||
state = TRIM_VALUE | ||
case TRIM_VALUE => trimToNextState(b, PARSE_UNIT_VALUE) | ||
case PARSE_UNIT_VALUE => | ||
b match { | ||
case _ if '0' <= b && b <= '9' => | ||
|
@@ -514,8 +522,7 @@ object IntervalUtils { | |
} catch { | ||
case _: ArithmeticException => return null | ||
} | ||
case ' ' => | ||
state = BEGIN_UNIT_NAME | ||
case ' ' => state = TRIM_UNIT | ||
case '.' => | ||
fractionScale = (NANOS_PER_SECOND / 10).toInt | ||
state = FRACTIONAL_PART | ||
|
@@ -529,68 +536,65 @@ object IntervalUtils { | |
fractionScale /= 10 | ||
case ' ' => | ||
fraction /= NANOS_PER_MICROS.toInt | ||
state = BEGIN_UNIT_NAME | ||
state = TRIM_UNIT | ||
case _ => return null | ||
} | ||
i += 1 | ||
case TRIM_UNIT => trimToNextState(b, BEGIN_UNIT_NAME) | ||
case BEGIN_UNIT_NAME => | ||
if (b == ' ') { | ||
i += 1 | ||
} else { | ||
// Checks that only seconds can have the fractional part | ||
if (b != 's' && fractionScale >= 0) { | ||
return null | ||
} | ||
if (isNegative) { | ||
currentValue = -currentValue | ||
fraction = -fraction | ||
} | ||
try { | ||
b match { | ||
case 'y' if s.matchAt(yearStr, i) => | ||
val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue) | ||
months = Math.toIntExact(Math.addExact(months, monthsInYears)) | ||
i += yearStr.numBytes() | ||
case 'w' if s.matchAt(weekStr, i) => | ||
val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue) | ||
days = Math.toIntExact(Math.addExact(days, daysInWeeks)) | ||
i += weekStr.numBytes() | ||
case 'd' if s.matchAt(dayStr, i) => | ||
days = Math.addExact(days, Math.toIntExact(currentValue)) | ||
i += dayStr.numBytes() | ||
case 'h' if s.matchAt(hourStr, i) => | ||
val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR) | ||
microseconds = Math.addExact(microseconds, hoursUs) | ||
i += hourStr.numBytes() | ||
case 's' if s.matchAt(secondStr, i) => | ||
val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND) | ||
microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction) | ||
i += secondStr.numBytes() | ||
case 'm' => | ||
if (s.matchAt(monthStr, i)) { | ||
months = Math.addExact(months, Math.toIntExact(currentValue)) | ||
i += monthStr.numBytes() | ||
} else if (s.matchAt(minuteStr, i)) { | ||
val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE) | ||
microseconds = Math.addExact(microseconds, minutesUs) | ||
i += minuteStr.numBytes() | ||
} else if (s.matchAt(millisStr, i)) { | ||
val millisUs = Math.multiplyExact( | ||
currentValue, | ||
MICROS_PER_MILLIS) | ||
microseconds = Math.addExact(microseconds, millisUs) | ||
i += millisStr.numBytes() | ||
} else if (s.matchAt(microsStr, i)) { | ||
microseconds = Math.addExact(microseconds, currentValue) | ||
i += microsStr.numBytes() | ||
} else return null | ||
case _ => return null | ||
} | ||
} catch { | ||
case _: ArithmeticException => return null | ||
// Checks that only seconds can have the fractional part | ||
if (b != 's' && fractionScale >= 0) { | ||
return null | ||
} | ||
if (isNegative) { | ||
currentValue = -currentValue | ||
fraction = -fraction | ||
} | ||
try { | ||
b match { | ||
case 'y' if s.matchAt(yearStr, i) => | ||
val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue) | ||
months = Math.toIntExact(Math.addExact(months, monthsInYears)) | ||
i += yearStr.numBytes() | ||
case 'w' if s.matchAt(weekStr, i) => | ||
val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue) | ||
days = Math.toIntExact(Math.addExact(days, daysInWeeks)) | ||
i += weekStr.numBytes() | ||
case 'd' if s.matchAt(dayStr, i) => | ||
days = Math.addExact(days, Math.toIntExact(currentValue)) | ||
i += dayStr.numBytes() | ||
case 'h' if s.matchAt(hourStr, i) => | ||
val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR) | ||
microseconds = Math.addExact(microseconds, hoursUs) | ||
i += hourStr.numBytes() | ||
case 's' if s.matchAt(secondStr, i) => | ||
val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND) | ||
microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction) | ||
i += secondStr.numBytes() | ||
case 'm' => | ||
if (s.matchAt(monthStr, i)) { | ||
months = Math.addExact(months, Math.toIntExact(currentValue)) | ||
i += monthStr.numBytes() | ||
} else if (s.matchAt(minuteStr, i)) { | ||
val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE) | ||
microseconds = Math.addExact(microseconds, minutesUs) | ||
i += minuteStr.numBytes() | ||
} else if (s.matchAt(millisStr, i)) { | ||
val millisUs = Math.multiplyExact( | ||
currentValue, | ||
MICROS_PER_MILLIS) | ||
microseconds = Math.addExact(microseconds, millisUs) | ||
i += millisStr.numBytes() | ||
} else if (s.matchAt(microsStr, i)) { | ||
microseconds = Math.addExact(microseconds, currentValue) | ||
i += microsStr.numBytes() | ||
} else return null | ||
case _ => return null | ||
} | ||
state = UNIT_NAME_SUFFIX | ||
} catch { | ||
case _: ArithmeticException => return null | ||
} | ||
state = UNIT_NAME_SUFFIX | ||
case UNIT_NAME_SUFFIX => | ||
b match { | ||
case 's' => state = END_UNIT_NAME | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,29 @@ | ||
OpenJDK 64-Bit Server VM 11.0.2+9 on Mac OS X 10.15.1 | ||
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz | ||
Java HotSpot(TM) 64-Bit Server VM 11.0.5+10-LTS on Mac OS X 10.14.6 | ||
Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz | ||
cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative | ||
------------------------------------------------------------------------------------------------------------------------ | ||
prepare string w/ interval 442 472 41 2.3 442.4 1.0X | ||
prepare string w/o interval 420 423 6 2.4 419.6 1.1X | ||
1 units w/ interval 350 359 9 2.9 349.8 1.3X | ||
1 units w/o interval 316 317 1 3.2 316.4 1.4X | ||
2 units w/ interval 457 459 2 2.2 457.0 1.0X | ||
2 units w/o interval 432 435 3 2.3 432.2 1.0X | ||
3 units w/ interval 610 613 3 1.6 609.8 0.7X | ||
3 units w/o interval 581 583 2 1.7 580.5 0.8X | ||
4 units w/ interval 720 724 4 1.4 720.4 0.6X | ||
4 units w/o interval 699 704 8 1.4 699.4 0.6X | ||
5 units w/ interval 850 850 0 1.2 849.9 0.5X | ||
5 units w/o interval 829 832 5 1.2 828.7 0.5X | ||
6 units w/ interval 927 932 4 1.1 927.1 0.5X | ||
6 units w/o interval 891 892 1 1.1 890.5 0.5X | ||
7 units w/ interval 1033 1040 8 1.0 1033.2 0.4X | ||
7 units w/o interval 1020 1024 5 1.0 1020.2 0.4X | ||
8 units w/ interval 1168 1169 2 0.9 1168.0 0.4X | ||
8 units w/o interval 1155 1157 2 0.9 1154.5 0.4X | ||
9 units w/ interval 1326 1328 3 0.8 1326.1 0.3X | ||
9 units w/o interval 1372 1381 14 0.7 1372.5 0.3X | ||
prepare string w/ interval 574 610 45 1.7 573.9 1.0X | ||
prepare string w/o interval 518 538 27 1.9 517.7 1.1X | ||
1 units w/ interval 425 439 16 2.4 425.3 1.3X | ||
1 units w/o interval 385 393 10 2.6 385.2 1.5X | ||
2 units w/ interval 553 561 11 1.8 553.1 1.0X | ||
2 units w/o interval 531 543 11 1.9 531.0 1.1X | ||
3 units w/ interval 1134 1159 32 0.9 1134.0 0.5X | ||
3 units w/o interval 1121 1126 6 0.9 1121.3 0.5X | ||
4 units w/ interval 1226 1250 21 0.8 1226.1 0.5X | ||
4 units w/o interval 1227 1239 11 0.8 1227.1 0.5X | ||
5 units w/ interval 1375 1447 93 0.7 1374.7 0.4X | ||
5 units w/o interval 1335 1346 19 0.7 1335.1 0.4X | ||
6 units w/ interval 1530 1556 24 0.7 1529.5 0.4X | ||
6 units w/o interval 1481 1492 17 0.7 1480.7 0.4X | ||
7 units w/ interval 1730 1745 14 0.6 1729.9 0.3X | ||
7 units w/o interval 1788 1859 112 0.6 1788.1 0.3X | ||
8 units w/ interval 1952 2087 117 0.5 1951.7 0.3X | ||
8 units w/o interval 2083 2207 209 0.5 2082.5 0.3X | ||
9 units w/ interval 2228 2291 60 0.4 2227.5 0.3X | ||
9 units w/o interval 2130 2184 75 0.5 2130.1 0.3X | ||
10 units w/ interval 2414 2502 81 0.4 2413.8 0.2X | ||
10 units w/o interval 2463 2488 35 0.4 2463.1 0.2X | ||
11 units w/ interval 2717 2755 42 0.4 2716.8 0.2X | ||
11 units w/o interval 2578 2661 77 0.4 2577.7 0.2X | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,29 @@ | ||
Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.1 | ||
Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz | ||
Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.14.6 | ||
Intel(R) Core(TM) i5-5287U CPU @ 2.90GHz | ||
cast strings to intervals: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative | ||
------------------------------------------------------------------------------------------------------------------------ | ||
prepare string w/ interval 422 437 16 2.4 421.8 1.0X | ||
prepare string w/o interval 369 374 8 2.7 369.4 1.1X | ||
1 units w/ interval 426 430 5 2.3 425.5 1.0X | ||
1 units w/o interval 382 386 5 2.6 382.1 1.1X | ||
2 units w/ interval 519 527 9 1.9 518.5 0.8X | ||
2 units w/o interval 505 512 6 2.0 505.4 0.8X | ||
3 units w/ interval 650 653 3 1.5 649.6 0.6X | ||
3 units w/o interval 630 633 4 1.6 629.7 0.7X | ||
4 units w/ interval 755 761 6 1.3 754.9 0.6X | ||
4 units w/o interval 745 749 3 1.3 745.3 0.6X | ||
5 units w/ interval 882 891 14 1.1 882.0 0.5X | ||
5 units w/o interval 867 870 3 1.2 867.4 0.5X | ||
6 units w/ interval 1008 1013 4 1.0 1008.2 0.4X | ||
6 units w/o interval 990 995 5 1.0 990.4 0.4X | ||
7 units w/ interval 1057 1063 6 0.9 1056.9 0.4X | ||
7 units w/o interval 1042 1046 4 1.0 1042.3 0.4X | ||
8 units w/ interval 1206 1208 2 0.8 1206.0 0.3X | ||
8 units w/o interval 1194 1198 4 0.8 1194.1 0.4X | ||
9 units w/ interval 1322 1324 3 0.8 1321.5 0.3X | ||
9 units w/o interval 1314 1318 4 0.8 1313.6 0.3X | ||
prepare string w/ interval 531 566 34 1.9 530.5 1.0X | ||
prepare string w/o interval 466 479 21 2.1 466.5 1.1X | ||
1 units w/ interval 475 521 63 2.1 475.0 1.1X | ||
1 units w/o interval 440 457 25 2.3 440.1 1.2X | ||
2 units w/ interval 614 621 11 1.6 613.7 0.9X | ||
2 units w/o interval 596 605 8 1.7 596.5 0.9X | ||
3 units w/ interval 1115 1120 4 0.9 1115.0 0.5X | ||
3 units w/o interval 1100 1107 6 0.9 1100.2 0.5X | ||
4 units w/ interval 1255 1263 9 0.8 1255.1 0.4X | ||
4 units w/o interval 1254 1393 130 0.8 1253.8 0.4X | ||
5 units w/ interval 1367 1373 5 0.7 1367.2 0.4X | ||
5 units w/o interval 1366 1376 9 0.7 1366.2 0.4X | ||
6 units w/ interval 1526 1530 6 0.7 1526.0 0.3X | ||
6 units w/o interval 1504 1510 7 0.7 1504.0 0.4X | ||
7 units w/ interval 1748 1778 27 0.6 1748.0 0.3X | ||
7 units w/o interval 1740 1744 5 0.6 1740.0 0.3X | ||
8 units w/ interval 2092 2107 14 0.5 2092.5 0.3X | ||
8 units w/o interval 2094 2098 5 0.5 2094.4 0.3X | ||
9 units w/ interval 1874 1880 5 0.5 1873.9 0.3X | ||
9 units w/o interval 1867 1872 4 0.5 1867.3 0.3X | ||
10 units w/ interval 2127 2134 13 0.5 2126.5 0.2X | ||
10 units w/o interval 2045 2049 6 0.5 2045.0 0.3X | ||
11 units w/ interval 2242 2254 13 0.4 2241.9 0.2X | ||
11 units w/o interval 2221 2227 6 0.5 2221.1 0.2X | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we can rename it to
TRIM_BEFORE_PARSE_VALUE
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so we may change
PARSE_UNIT_VALUE
toPARSE_VALUE
tooHow about renaming them all as,
Seems loud and clear enough for each state
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what does
BODY
mean? others LGTMThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
interval (PREFIX, BODY)
BODY (SIGN, VALUE , UNIT)+
Seems not persuadable enough
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or
NEXT_VALUE_UNIT
?NEXT_VALUE_UNIT_PAIR