Skip to content

Commit

Permalink
[SPARK-23087][SQL] CheckCartesianProduct too restrictive when conditi…
Browse files Browse the repository at this point in the history
…on is false/null

## What changes were proposed in this pull request?

CheckCartesianProduct raises an AnalysisException also when the join condition is always false/null. In this case, we shouldn't raise it, since the result will not be a cartesian product.

## How was this patch tested?

added UT

Author: Marco Gaido <marcogaido91@gmail.com>

Closes #20333 from mgaido91/SPARK-23087.

(cherry picked from commit 121dc96)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
  • Loading branch information
mgaido91 authored and gatorsmile committed Jan 21, 2018
1 parent b9c1367 commit e0ef30f
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1108,15 +1108,19 @@ object CheckCartesianProducts extends Rule[LogicalPlan] with PredicateHelper {
*/
def isCartesianProduct(join: Join): Boolean = {
val conditions = join.condition.map(splitConjunctivePredicates).getOrElse(Nil)
!conditions.map(_.references).exists(refs => refs.exists(join.left.outputSet.contains)
&& refs.exists(join.right.outputSet.contains))

conditions match {
case Seq(Literal.FalseLiteral) | Seq(Literal(null, BooleanType)) => false
case _ => !conditions.map(_.references).exists(refs =>
refs.exists(join.left.outputSet.contains) && refs.exists(join.right.outputSet.contains))
}
}

def apply(plan: LogicalPlan): LogicalPlan =
if (SQLConf.get.crossJoinEnabled) {
plan
} else plan transform {
case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, condition)
case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, _)
if isCartesianProduct(j) =>
throw new AnalysisException(
s"""Detected cartesian product for ${j.joinType.sql} join between logical plans
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,4 +274,18 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
checkAnswer(innerJoin, Row(1) :: Nil)
}

test("SPARK-23087: don't throw Analysis Exception in CheckCartesianProduct when join condition " +
"is false or null") {
val df = spark.range(10)
val dfNull = spark.range(10).select(lit(null).as("b"))
val planNull = df.join(dfNull, $"id" === $"b", "left").queryExecution.analyzed

spark.sessionState.executePlan(planNull).optimizedPlan

val dfOne = df.select(lit(1).as("a"))
val dfTwo = spark.range(10).select(lit(2).as("b"))
val planFalse = dfOne.join(dfTwo, $"a" === $"b", "left").queryExecution.analyzed

spark.sessionState.executePlan(planFalse).optimizedPlan
}
}

0 comments on commit e0ef30f

Please sign in to comment.