From dee27ee04559c75bb996161bc1e1be829354fe06 Mon Sep 17 00:00:00 2001 From: TJX2014 Date: Fri, 19 Jun 2020 19:24:34 -0700 Subject: [PATCH] [SPARK-31980][SQL] Function sequence() fails if start and end of range are equal dates ### What changes were proposed in this pull request? 1. Add judge equal as bigger condition in `org.apache.spark.sql.catalyst.expressions.Sequence.TemporalSequenceImpl#eval` 2. Unit test for interval `day`, `month`, `year` ### Why are the changes needed? Bug exists when sequence input get same equal start and end dates, which will occur `while loop` forever ### Does this PR introduce _any_ user-facing change? Yes, Before this PR, people will get a `java.lang.ArrayIndexOutOfBoundsException`, when eval as below: `sql("select sequence(cast('2011-03-01' as date), cast('2011-03-01' as date), interval 1 year)").show(false) ` ### How was this patch tested? Unit test. Closes #28819 from TJX2014/master-SPARK-31980. Authored-by: TJX2014 Signed-off-by: Dongjoon Hyun (cherry picked from commit 177a380bcf1f56982760442e8418d28415bef8b0) Signed-off-by: Dongjoon Hyun --- .../expressions/collectionOperations.scala | 4 ++-- .../CollectionExpressionsSuite.scala | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 785369673aa63..6d74f4569bca3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -2671,7 +2671,7 @@ object Sequence { val maxEstimatedArrayLength = getSequenceLength(startMicros, stopMicros, intervalStepInMicros) - val stepSign = if (stopMicros > startMicros) +1 else -1 + val stepSign = if (stopMicros >= startMicros) +1 else -1 val exclusiveItem = stopMicros + stepSign val arr = new Array[T](maxEstimatedArrayLength) var t = startMicros @@ -2734,7 +2734,7 @@ object Sequence { | | $sequenceLengthCode | - | final int $stepSign = $stopMicros > $startMicros ? +1 : -1; + | final int $stepSign = $stopMicros >= $startMicros ? +1 : -1; | final long $exclusiveItem = $stopMicros + $stepSign; | | $arr = new $elemType[$arrLength]; diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 1984fa1d4b9d1..dbe252fcd5526 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -1743,4 +1743,22 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper assert(ArrayIntersect(a20, a21).dataType.asInstanceOf[ArrayType].containsNull === false) assert(ArrayIntersect(a23, a24).dataType.asInstanceOf[ArrayType].containsNull === true) } + + test("SPARK-31980: Start and end equal in month range") { + checkEvaluation(new Sequence( + Literal(Date.valueOf("2018-01-01")), + Literal(Date.valueOf("2018-01-01")), + Literal(stringToInterval("interval 1 day"))), + Seq(Date.valueOf("2018-01-01"))) + checkEvaluation(new Sequence( + Literal(Date.valueOf("2018-01-01")), + Literal(Date.valueOf("2018-01-01")), + Literal(stringToInterval("interval 1 month"))), + Seq(Date.valueOf("2018-01-01"))) + checkEvaluation(new Sequence( + Literal(Date.valueOf("2018-01-01")), + Literal(Date.valueOf("2018-01-01")), + Literal(stringToInterval("interval 1 year"))), + Seq(Date.valueOf("2018-01-01"))) + } }