Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-27638][SQL]: Cast string to date/timestamp in binary comparisons with dates/timestamps #24567

Closed
wants to merge 7 commits into from
Expand Up @@ -120,13 +120,14 @@ object TypeCoercion {
*/
private def findCommonTypeForBinaryComparison(
dt1: DataType, dt2: DataType, conf: SQLConf): Option[DataType] = (dt1, dt2) match {
// We should cast all relative timestamp/date/string comparison into string comparisons
// This behaves as a user would expect because timestamp strings sort lexicographically.
// i.e. TimeStamp(2013-01-01 00:00 ...) < "2014" = true
case (StringType, DateType) => Some(StringType)
case (DateType, StringType) => Some(StringType)
case (StringType, TimestampType) => Some(StringType)
case (TimestampType, StringType) => Some(StringType)
case (StringType, DateType)
=> if (conf.castDateTimestampToString) Some(StringType) else Some(DateType)
case (DateType, StringType)
=> if (conf.castDateTimestampToString) Some(StringType) else Some(DateType)
case (StringType, TimestampType)
=> if (conf.castDateTimestampToString) Some(StringType) else Some(TimestampType)
case (TimestampType, StringType)
=> if (conf.castDateTimestampToString) Some(StringType) else Some(TimestampType)
case (StringType, NullType) => Some(StringType)
case (NullType, StringType) => Some(StringType)

Expand Down
Expand Up @@ -1760,6 +1760,13 @@ object SQLConf {
.internal()
.intConf
.createWithDefault(Int.MaxValue)

val LEGACY_CAST_DATE_TIMESTAMP_TO_STRING =
buildConf("spark.sql.legacy.binaryComparison.castDateTimestampToString")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

spark.sql.legacy.typeCoercion.datetimeToString

.doc("If it is set to true, date/timestamp will cast to string in binary comparisons " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we also add a migration guide for this behavior change and mention this config?

"with String")
.booleanConf
.createWithDefault(false)
}

/**
Expand Down Expand Up @@ -2211,6 +2218,8 @@ class SQLConf extends Serializable with Logging {
def setCommandRejectsSparkCoreConfs: Boolean =
getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)

def castDateTimestampToString: Boolean = getConf(SQLConf.LEGACY_CAST_DATE_TIMESTAMP_TO_STRING)

/** ********************** SQLConf functionality methods ************ */

/** Set Spark SQL configuration properties. */
Expand Down
Expand Up @@ -85,7 +85,7 @@ false
-- !query 10
select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'
-- !query 10 schema
struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean>
struct<(to_date('2009-07-30 04:17:52') > CAST(2009-07-30 04:17:52 AS DATE)):boolean>
-- !query 10 output
false

Expand Down Expand Up @@ -141,9 +141,9 @@ true
-- !query 17
select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'
-- !query 17 schema
struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean>
struct<(to_date('2009-07-30 04:17:52') >= CAST(2009-07-30 04:17:52 AS DATE)):boolean>
-- !query 17 output
false
true


-- !query 18
Expand Down Expand Up @@ -197,9 +197,9 @@ false
-- !query 24
select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'
-- !query 24 schema
struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean>
struct<(to_date('2009-07-30 04:17:52') < CAST(2009-07-30 04:17:52 AS DATE)):boolean>
-- !query 24 output
true
false


-- !query 25
Expand Down Expand Up @@ -253,7 +253,7 @@ true
-- !query 31
select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'
-- !query 31 schema
struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean>
struct<(to_date('2009-07-30 04:17:52') <= CAST(2009-07-30 04:17:52 AS DATE)):boolean>
-- !query 31 output
true

Expand Down