Skip to content

Commit

Permalink
[SPARK-9292] Analysis should check that join conditions' data types a…
Browse files Browse the repository at this point in the history
…re BooleanType

This patch adds an analysis check to ensure that join conditions' data types are BooleanType. This check is necessary in order to report proper errors for non-boolean DataFrame join conditions.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #7630 from JoshRosen/SPARK-9292 and squashes the following commits:

aec6c7b [Josh Rosen] Check condition type in resolved()
75a3ea6 [Josh Rosen] Fix SPARK-9292.
  • Loading branch information
JoshRosen authored and marmbrus committed Jul 24, 2015
1 parent c8d71a4 commit c2b50d6
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ trait CheckAnalysis {
s"filter expression '${f.condition.prettyString}' " +
s"of type ${f.condition.dataType.simpleString} is not a boolean.")

case j @ Join(_, _, _, Some(condition)) if condition.dataType != BooleanType =>
failAnalysis(
s"join condition '${condition.prettyString}' " +
s"of type ${condition.dataType.simpleString} is not a boolean.")

case Aggregate(groupingExprs, aggregateExprs, child) =>
def checkValidAggregateExpression(expr: Expression): Unit = expr match {
case _: AggregateExpression => // OK
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,10 @@ case class Join(

// Joins are only resolved if they don't introduce ambiguous expression ids.
override lazy val resolved: Boolean = {
childrenResolved && expressions.forall(_.resolved) && selfJoinResolved
childrenResolved &&
expressions.forall(_.resolved) &&
selfJoinResolved &&
condition.forall(_.dataType == BooleanType)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ class AnalysisErrorSuite extends SparkFunSuite with BeforeAndAfter {
testRelation.where(Literal(1)),
"filter" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil)

errorTest(
"non-boolean join conditions",
testRelation.join(testRelation, condition = Some(Literal(1))),
"condition" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil)

errorTest(
"missing group by",
testRelation2.groupBy('a)('b),
Expand Down

0 comments on commit c2b50d6

Please sign in to comment.