From 2dd37d827f2e443dcb3eaf8a95437d179130d55c Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 24 Aug 2017 16:44:12 +0200 Subject: [PATCH] [SPARK-21826][SQL] outer broadcast hash join should not throw NPE ## What changes were proposed in this pull request? This is a bug introduced by https://github.com/apache/spark/pull/11274/files#diff-7adb688cbfa583b5711801f196a074bbL274 . Non-equal join condition should only be applied when the equal-join condition matches. ## How was this patch tested? regression test Author: Wenchen Fan Closes #19036 from cloud-fan/bug. --- .../joins/BroadcastHashJoinExec.scala | 2 +- .../org/apache/spark/sql/JoinSuite.scala | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala index bfa1e9d49a545..2f52a089ef9bf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala @@ -283,8 +283,8 @@ case class BroadcastHashJoinExec( s""" |boolean $conditionPassed = true; |${eval.trim} - |${ev.code} |if ($matched != null) { + | ${ev.code} | $conditionPassed = !${ev.isNull} && ${ev.value}; |} """.stripMargin diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 86fe09bd977af..453052a8ce191 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql +import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer import scala.language.existentials @@ -26,6 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.execution.joins._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.types.StructType class JoinSuite extends QueryTest with SharedSQLContext { import testImplicits._ @@ -767,4 +769,22 @@ class JoinSuite extends QueryTest with SharedSQLContext { } } } + + test("outer broadcast hash join should not throw NPE") { + withTempView("v1", "v2") { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { + Seq(2 -> 2).toDF("x", "y").createTempView("v1") + + spark.createDataFrame( + Seq(Row(1, "a")).asJava, + new StructType().add("i", "int", nullable = false).add("j", "string", nullable = false) + ).createTempView("v2") + + checkAnswer( + sql("select x, y, i, j from v1 left join v2 on x = i and y < length(j)"), + Row(2, 2, null, null) + ) + } + } + } }