Skip to content

Commit

Permalink
[SPARK-26576][SQL] Broadcast hint not applied to partitioned table
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Make sure broadcast hint is applied to partitioned tables.

## How was this patch tested?

- A new unit test in PruneFileSourcePartitionsSuite
- Unit test suites touched by SPARK-14581: JoinOptimizationSuite, FilterPushdownSuite, ColumnPruningSuite, and PruneFiltersSuite

Closes #23507 from jzhuge/SPARK-26576.

Closes #23530 from jzhuge/SPARK-26576-master.

Authored-by: John Zhuge <jzhuge@apache.org>
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
  • Loading branch information
jzhuge authored and gatorsmile committed Jan 13, 2019
1 parent 985f966 commit 3f80071
Showing 1 changed file with 16 additions and 0 deletions.
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.hive.execution

import org.scalatest.Matchers._

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.dsl.expressions._
Expand All @@ -25,7 +27,10 @@ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions}
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
import org.apache.spark.sql.functions.broadcast
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.StructType

Expand Down Expand Up @@ -91,4 +96,15 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
assert(size2 < tableStats.get.sizeInBytes)
}
}

test("SPARK-26576 Broadcast hint not applied to partitioned table") {
withTable("tbl") {
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl")
val df = spark.table("tbl")
val qe = df.join(broadcast(df), "p").queryExecution
qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1
}
}
}
}

0 comments on commit 3f80071

Please sign in to comment.