Skip to content

Commit

Permalink
[SPARK-36431][SQL] Support TypeCoercion of ANSI intervals with differ…
Browse files Browse the repository at this point in the history
…ent fields

### What changes were proposed in this pull request?
 Support TypeCoercion of ANSI intervals with different fields

### Why are the changes needed?
 Support TypeCoercion of ANSI intervals with different fields

### Does this PR introduce _any_ user-facing change?
After this pr user can
 - use comparison function with  different fields of DayTimeIntervalType/YearMonthIntervalType such as `INTERVAL '1' YEAR` > `INTERVAL '11' MONTH`
 - support different field of ansi interval type in collection function such as `array(INTERVAL '1' YEAR, INTERVAL '11' MONTH)`
 - support different field of ansi interval type in `coalesce` etc..

### How was this patch tested?
Added UT

Closes #33661 from AngersZhuuuu/SPARK-SPARK-36431.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
  • Loading branch information
AngersZhuuuu authored and MaxGekk committed Aug 10, 2021
1 parent 7f56b73 commit 89d8a4e
Show file tree
Hide file tree
Showing 6 changed files with 258 additions and 8 deletions.
Expand Up @@ -120,6 +120,11 @@ object AnsiTypeCoercion extends TypeCoercionBase {
case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
Some(TimestampType)

case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
Some(DayTimeIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))
case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
Some(YearMonthIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))

case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType)
}

Expand Down
Expand Up @@ -867,6 +867,11 @@ object TypeCoercion extends TypeCoercionBase {
case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
Some(TimestampType)

case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
Some(DayTimeIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))
case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
Some(YearMonthIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))

case (_: TimestampNTZType, _: DateType) | (_: DateType, _: TimestampNTZType) =>
Some(TimestampNTZType)

Expand Down
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.catalyst.analysis

import java.sql.Timestamp
import java.time.{Duration, Period}

import org.apache.spark.internal.config.Tests.IS_TESTING
import org.apache.spark.sql.catalyst.analysis.TypeCoercion._
Expand Down Expand Up @@ -1604,6 +1605,52 @@ class TypeCoercionSuite extends AnalysisTest {
ruleTest(TypeCoercion.IntegralDivision, IntegralDivide(2, 1L),
IntegralDivide(Cast(2, LongType), 1L))
}

test("SPARK-36431: Support TypeCoercion of ANSI intervals with different fields") {
DataTypeTestUtils.yearMonthIntervalTypes.foreach { ym1 =>
DataTypeTestUtils.yearMonthIntervalTypes.foreach { ym2 =>
val literal1 = Literal.create(Period.ofMonths(12), ym1)
val literal2 = Literal.create(Period.ofMonths(12), ym2)
val commonType = YearMonthIntervalType(
ym1.startField.min(ym2.startField), ym1.endField.max(ym2.endField))
if (commonType == ym1 && commonType == ym2) {
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
EqualTo(literal1, literal2))
} else if (commonType == ym1) {
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
EqualTo(literal1, Cast(literal2, commonType)))
} else if (commonType == ym2) {
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
EqualTo(Cast(literal1, commonType), literal2))
} else {
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
EqualTo(Cast(literal1, commonType), Cast(literal2, commonType)))
}
}
}

DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt1 =>
DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt2 =>
val literal1 = Literal.create(Duration.ofSeconds(1111), dt1)
val literal2 = Literal.create(Duration.ofSeconds(1111), dt2)
val commonType = DayTimeIntervalType(
dt1.startField.min(dt2.startField), dt1.endField.max(dt2.endField))
if (commonType == dt1 && commonType == dt2) {
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
EqualTo(literal1, literal2))
} else if (commonType == dt1) {
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
EqualTo(literal1, Cast(literal2, commonType)))
} else if (commonType == dt2) {
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
EqualTo(Cast(literal1, commonType), literal2))
} else {
ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
EqualTo(Cast(literal1, commonType), Cast(literal2, commonType)))
}
}
}
}
}


Expand Down
13 changes: 13 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/interval.sql
Expand Up @@ -322,3 +322,16 @@ SELECT INTERVAL '153722867280' MINUTE;
SELECT INTERVAL '-153722867280' MINUTE;
SELECT INTERVAL '54.775807' SECOND;
SELECT INTERVAL '-54.775807' SECOND;

SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR;
SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND;
SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH;
SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH;
SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS;

SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 200
-- Number of queries: 211


-- !query
Expand Down Expand Up @@ -818,10 +818,9 @@ struct<map(1, INTERVAL '1' DAY, 2, INTERVAL '2' DAY):map<int,interval day>>
-- !query
select map(1, interval 1 year, 2, interval 2 month)
-- !query schema
struct<>
struct<map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH):map<int,interval year to month>>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH)' due to data type mismatch: The given values of function map should all be the same type, but they are [interval year, interval month]; line 1 pos 7
{1:1-0,2:0-2}


-- !query
Expand Down Expand Up @@ -1985,3 +1984,94 @@ SELECT INTERVAL '-54.775807' SECOND
struct<INTERVAL '-54.775807' SECOND:interval second>
-- !query output
-0 00:00:54.775807000


-- !query
SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR
-- !query schema
struct<(INTERVAL '1' DAY > INTERVAL '01' HOUR):boolean>
-- !query output
true


-- !query
SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND
-- !query schema
struct<(INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND):boolean>
-- !query output
false


-- !query
SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH
-- !query schema
struct<(INTERVAL '1' YEAR < INTERVAL '1' MONTH):boolean>
-- !query output
false


-- !query
SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH
-- !query schema
struct<(INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH):boolean>
-- !query output
true


-- !query
SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' (interval month and interval day).; line 1 pos 7


-- !query
SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
-- !query schema
struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to month>>
-- !query output
[1-0,0-1]


-- !query
SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
-- !query schema
struct<array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):array<interval day to minute>>
-- !query output
[1 00:00:00.000000000,0 01:01:00.000000000]


-- !query
SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function array should all be the same type, but it's [interval month, interval day]; line 1 pos 7


-- !query
SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
-- !query schema
struct<coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH):interval year to month>
-- !query output
1-0


-- !query
SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
-- !query schema
struct<coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):interval day to minute>
-- !query output
1 00:00:00.000000000


-- !query
SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7
98 changes: 94 additions & 4 deletions sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 200
-- Number of queries: 211


-- !query
Expand Down Expand Up @@ -817,10 +817,9 @@ struct<map(1, INTERVAL '1' DAY, 2, INTERVAL '2' DAY):map<int,interval day>>
-- !query
select map(1, interval 1 year, 2, interval 2 month)
-- !query schema
struct<>
struct<map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH):map<int,interval year to month>>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH)' due to data type mismatch: The given values of function map should all be the same type, but they are [interval year, interval month]; line 1 pos 7
{1:1-0,2:0-2}


-- !query
Expand Down Expand Up @@ -1984,3 +1983,94 @@ SELECT INTERVAL '-54.775807' SECOND
struct<INTERVAL '-54.775807' SECOND:interval second>
-- !query output
-0 00:00:54.775807000


-- !query
SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR
-- !query schema
struct<(INTERVAL '1' DAY > INTERVAL '01' HOUR):boolean>
-- !query output
true


-- !query
SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND
-- !query schema
struct<(INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND):boolean>
-- !query output
false


-- !query
SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH
-- !query schema
struct<(INTERVAL '1' YEAR < INTERVAL '1' MONTH):boolean>
-- !query output
false


-- !query
SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH
-- !query schema
struct<(INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH):boolean>
-- !query output
true


-- !query
SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' (interval month and interval day).; line 1 pos 7


-- !query
SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
-- !query schema
struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to month>>
-- !query output
[1-0,0-1]


-- !query
SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
-- !query schema
struct<array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):array<interval day to minute>>
-- !query output
[1 00:00:00.000000000,0 01:01:00.000000000]


-- !query
SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function array should all be the same type, but it's [interval month, interval day]; line 1 pos 7


-- !query
SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
-- !query schema
struct<coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH):interval year to month>
-- !query output
1-0


-- !query
SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
-- !query schema
struct<coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):interval day to minute>
-- !query output
1 00:00:00.000000000


-- !query
SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7

0 comments on commit 89d8a4e

Please sign in to comment.