Skip to content

Commit

Permalink
[SPARK-21826][SQL] outer broadcast hash join should not throw NPE
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

This is a bug introduced by https://github.com/apache/spark/pull/11274/files#diff-7adb688cbfa583b5711801f196a074bbL274 .

Non-equal join condition should only be applied when the equal-join condition matches.

## How was this patch tested?

regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #19036 from cloud-fan/bug.
  • Loading branch information
cloud-fan authored and hvanhovell committed Aug 24, 2017
1 parent 183d4cb commit 2dd37d8
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,8 @@ case class BroadcastHashJoinExec(
s"""
|boolean $conditionPassed = true;
|${eval.trim}
|${ev.code}
|if ($matched != null) {
| ${ev.code}
| $conditionPassed = !${ev.isNull} && ${ev.value};
|}
""".stripMargin
Expand Down
20 changes: 20 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.spark.sql

import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import scala.language.existentials

Expand All @@ -26,6 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.execution.joins._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types.StructType

class JoinSuite extends QueryTest with SharedSQLContext {
import testImplicits._
Expand Down Expand Up @@ -767,4 +769,22 @@ class JoinSuite extends QueryTest with SharedSQLContext {
}
}
}

test("outer broadcast hash join should not throw NPE") {
withTempView("v1", "v2") {
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
Seq(2 -> 2).toDF("x", "y").createTempView("v1")

spark.createDataFrame(
Seq(Row(1, "a")).asJava,
new StructType().add("i", "int", nullable = false).add("j", "string", nullable = false)
).createTempView("v2")

checkAnswer(
sql("select x, y, i, j from v1 left join v2 on x = i and y < length(j)"),
Row(2, 2, null, null)
)
}
}
}
}

0 comments on commit 2dd37d8

Please sign in to comment.