From d7bb327aeee59350e7aee5af19967cec718119b8 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 11 Mar 2021 15:21:15 +0000 Subject: [PATCH] [SPARK-34695][SQL] Fix long overflow in conversion of minimum duration to microseconds ### What changes were proposed in this pull request? In the PR, I propose to especially handle the amount of seconds `-9223372036855` in `IntervalUtils. durationToMicros()`. Starting from the amount (any durations with the second field < `-9223372036855`), input durations cannot fit to `Long` in the conversion to microseconds. For example, the amount of microseconds = `Long.MinValue = -9223372036854775808` can be represented in two forms: 1. seconds = -9223372036854, nanoAdjustment = -775808, or 2. seconds = -9223372036855, nanoAdjustment = +224192 And the method `Duration.ofSeconds()` produces the last form but such form causes overflow while converting `-9223372036855` seconds to microseconds. In the PR, I propose to convert the second form to the first one if the second field of input duration is equal to `-9223372036855`. ### Why are the changes needed? The changes fix the issue demonstrated by the code: ```scala scala> durationToMicros(microsToDuration(Long.MinValue)) java.lang.ArithmeticException: long overflow at java.lang.Math.multiplyExact(Math.java:892) at org.apache.spark.sql.catalyst.util.IntervalUtils$.durationToMicros(IntervalUtils.scala:782) ... 49 elided ``` The `durationToMicros()` method cannot handle valid output of `microsToDuration()`. ### Does this PR introduce _any_ user-facing change? Should not since new interval types has not been released yet. ### How was this patch tested? By running new UT from `IntervalUtilsSuite`. Closes #31799 from MaxGekk/fix-min-duration. Authored-by: Max Gekk Signed-off-by: Wenchen Fan --- .../sql/catalyst/util/IntervalUtils.scala | 18 +++++++++++++++--- .../sql/catalyst/util/IntervalUtilsSuite.scala | 16 ++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 06ab4b603f028..58a52475b624f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -765,6 +765,9 @@ object IntervalUtils { new CalendarInterval(totalMonths, totalDays, micros) } + // The amount of seconds that can cause overflow in the conversion to microseconds + private final val minDurationSeconds = Math.floorDiv(Long.MinValue, MICROS_PER_SECOND) + /** * Converts this duration to the total length in microseconds. *

@@ -779,9 +782,18 @@ object IntervalUtils { * @throws ArithmeticException If numeric overflow occurs */ def durationToMicros(duration: Duration): Long = { - val us = Math.multiplyExact(duration.getSeconds, MICROS_PER_SECOND) - val result = Math.addExact(us, duration.getNano / NANOS_PER_MICROS) - result + val seconds = duration.getSeconds + if (seconds == minDurationSeconds) { + val microsInSeconds = (minDurationSeconds + 1) * MICROS_PER_SECOND + val nanoAdjustment = duration.getNano + assert(0 <= nanoAdjustment && nanoAdjustment < NANOS_PER_SECOND, + "Duration.getNano() must return the adjustment to the seconds field " + + "in the range from 0 to 999999999 nanoseconds, inclusive.") + Math.addExact(microsInSeconds, (nanoAdjustment - NANOS_PER_SECOND) / NANOS_PER_MICROS) + } else { + val microsInSeconds = Math.multiplyExact(seconds, MICROS_PER_SECOND) + Math.addExact(microsInSeconds, duration.getNano / NANOS_PER_MICROS) + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala index df2656fb7aa87..607337383a78a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala @@ -425,4 +425,20 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper { assert(monthsToPeriod(Int.MaxValue) === Period.ofYears(178956970).withMonths(7)) assert(monthsToPeriod(Int.MinValue) === Period.ofYears(-178956970).withMonths(-8)) } + + test("SPARK-34695: round trip conversion of micros -> duration -> micros") { + Seq( + 0, + MICROS_PER_SECOND - 1, + -MICROS_PER_SECOND + 1, + MICROS_PER_SECOND, + -MICROS_PER_SECOND, + Long.MaxValue - MICROS_PER_SECOND, + Long.MinValue + MICROS_PER_SECOND, + Long.MaxValue, + Long.MinValue).foreach { micros => + val duration = microsToDuration(micros) + assert(durationToMicros(duration) === micros) + } + } }