Skip to content

Commit

Permalink
[SPARK-12841][SQL] fix cast in filter
Browse files Browse the repository at this point in the history
In SPARK-10743 we wrap cast with `UnresolvedAlias` to give `Cast` a better alias if possible. However, for cases like `filter`, the `UnresolvedAlias` can't be resolved and actually we don't need a better alias for this case.  This PR move the cast wrapping logic to `Column.named` so that we will only do it when we need a alias name.

Author: Wenchen Fan <wenchen@databricks.com>

Closes apache#10781 from cloud-fan/bug.
  • Loading branch information
cloud-fan authored and yhuai committed Jan 18, 2016
1 parent 38c3c0e commit 4f11e3f
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class Analyzer(
private def assignAliases(exprs: Seq[NamedExpression]) = {
exprs.zipWithIndex.map {
case (expr, i) =>
expr transform {
expr transformUp {
case u @ UnresolvedAlias(child, optionalAliasName) => child match {
case ne: NamedExpression => ne
case e if !e.resolved => u
Expand Down
17 changes: 10 additions & 7 deletions sql/core/src/main/scala/org/apache/spark/sql/Column.scala
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,15 @@ class Column(protected[sql] val expr: Expression) extends Logging {

case func: UnresolvedFunction => UnresolvedAlias(func, Some(func.prettyString))

// If we have a top level Cast, there is a chance to give it a better alias, if there is a
// NamedExpression under this Cast.
case c: Cast => c.transformUp {
case Cast(ne: NamedExpression, to) => UnresolvedAlias(Cast(ne, to))
} match {
case ne: NamedExpression => ne
case other => Alias(expr, expr.prettyString)()
}

case expr: Expression => Alias(expr, expr.prettyString)()
}

Expand Down Expand Up @@ -921,13 +930,7 @@ class Column(protected[sql] val expr: Expression) extends Logging {
* @group expr_ops
* @since 1.3.0
*/
def cast(to: DataType): Column = withExpr {
expr match {
// keeps the name of expression if possible when do cast.
case ne: NamedExpression => UnresolvedAlias(Cast(expr, to))
case _ => Cast(expr, to)
}
}
def cast(to: DataType): Column = withExpr { Cast(expr, to) }

/**
* Casts the column to a different data type, using the canonical string representation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
test("SPARK-10743: keep the name of expression if possible when do cast") {
val df = (1 to 10).map(Tuple1.apply).toDF("i").as("src")
assert(df.select($"src.i".cast(StringType)).columns.head === "i")
assert(df.select($"src.i".cast(StringType).cast(IntegerType)).columns.head === "i")
}

test("SPARK-11301: fix case sensitivity for filter on partitioned columns") {
Expand Down Expand Up @@ -1228,4 +1229,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
checkAnswer(df.withColumn("col.a", lit("c")), Row("c", "b"))
checkAnswer(df.withColumn("col.c", lit("c")), Row("a", "b", "c"))
}

test("SPARK-12841: cast in filter") {
checkAnswer(
Seq(1 -> "a").toDF("i", "j").filter($"i".cast(StringType) === "1"),
Row(1, "a"))
}
}

0 comments on commit 4f11e3f

Please sign in to comment.