Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
if canImplicitlyCast(fromExp, toType, literalType) =>
simplifyNumericComparison(be, fromExp, toType, value)

case be @ BinaryComparison(
Cast(fromExp, _, timeZoneId, evalMode), date @ Literal(value, DateType))
if fromExp.dataType == StringType && value != null =>
be.withNewChildren(Seq(fromExp, Cast(date, StringType, timeZoneId, evalMode)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is tricky, as we need to be very careful about the reverse of date string parsing. Can we reference the detail of date string parsing in Cast and prove it's safe to do this optimization?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UnwrapCastInBinaryComparison class is only responsible for processing casts related to numeric types, and it is dangerous to cast date types. Can we add the related conversion of partition columns to the PruneHiveTablePartitions rule?


// As the analyzer makes sure that the list of In is already of the same data type, then the
// rule can simply check the first literal in `in.list` can implicitly cast to `toType` or not,
// and note that:
Expand Down Expand Up @@ -350,7 +355,7 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
literalType: DataType): Boolean = {
toType.sameType(literalType) &&
!fromExp.foldable &&
toType.isInstanceOf[NumericType] &&
(toType.isInstanceOf[NumericType] || toType.isInstanceOf[DateType]) &&
canUnwrapCast(fromExp.dataType, toType)
}

Expand All @@ -361,6 +366,7 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
case (IntegerType, FloatType) => false
case (LongType, FloatType) => false
case (LongType, DoubleType) => false
case (StringType, DateType) => true
case _ if from.isInstanceOf[NumericType] => Cast.canUpCast(from, to)
case _ => false
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.catalyst.optimizer

import java.sql.Date

import scala.collection.immutable.HashSet

import org.apache.spark.sql.catalyst.dsl.expressions._
Expand Down Expand Up @@ -368,6 +370,35 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
assertEquivalent(castInt(f4) < t, trueIfNotNull(f4))
}

test("SPARK-40610: Support unwrap date type to string type") {
val relation = LocalRelation($"a".string, $"b".string)
val dateLit = Literal.create(Date.valueOf("2023-01-01"), DateType)
val nullLit = Literal.create(null, DateType)

def assertUnwrapStringToDateType(e1: Expression, e2: Expression): Unit = {
val plan = relation.where(e1).analyze
val actual = Optimize.execute(plan)
val expected = relation.where(e2).analyze
comparePlans(actual, expected)
}

Seq(EqualTo, EqualNullSafe, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual)
.foreach { binaryComparison =>
assertUnwrapStringToDateType(
binaryComparison(Cast($"a", DateType), dateLit),
binaryComparison($"a", Cast(dateLit, StringType)))
}

assertUnwrapStringToDateType(
dateLit =!= Cast($"a", DateType),
Cast(dateLit, StringType) =!= $"a")

// Null date literal should be handled by NullPropagation
assertUnwrapStringToDateType(
Cast($"a", DateType) > nullLit,
Literal.create(null, BooleanType))
}

private def castInt(e: Expression): Expression = Cast(e, IntegerType)
private def castDouble(e: Expression): Expression = Cast(e, DoubleType)
private def castDecimal2(e: Expression): Expression = Cast(e, DecimalType(10, 4))
Expand Down