From 18f928e01d13e73104f2c1a1fd86e964d5bf9391 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Thu, 16 Jun 2016 11:38:55 -0700 Subject: [PATCH 1/2] added a test case --- .../hive/execution/HiveTableScanSuite.scala | 44 +++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index 60f8be5e0e825..9a8e00c2f293c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -18,13 +18,14 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.Row -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton} import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.util.Utils -class HiveTableScanSuite extends HiveComparisonTest { +class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestHiveSingleton { createQueryTest("partition_based_table_scan_with_different_serde", """ @@ -89,4 +90,41 @@ class HiveTableScanSuite extends HiveComparisonTest { assert(sql("select CaseSensitiveColName from spark_4959_2").head() === Row("hi")) assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi")) } + + test("Verify SQLConf HIVE_METASTORE_PARTITION_PRUNING") { + val view = "src" + withTempTable(view) { + spark.range(1, 5).createOrReplaceTempView(view) + val table = "table_with_partition" + withTable(table) { + sql( + s""" + |CREATE TABLE $table(id string) + |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) + """.stripMargin) + sql( + s""" + |FROM $view v + |INSERT OVERWRITE TABLE $table + |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') + |SELECT v.id + |INSERT OVERWRITE TABLE $table + |PARTITION (p1='a',p2='c',p3='c',p4='d',p5='e') + |SELECT v.id + """.stripMargin) + Seq("true", "false").foreach { hivePruning => + withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> hivePruning) { + val plan = sql(s"SELECT id, p2 FROM $table WHERE p2 <= 'b'").queryExecution.sparkPlan + val partValues = plan.flatMap { + case p: HiveTableScanExec => p.relation.getHiveQlPartitions(p.partitionPruningPred) + } + // If the pruning predicate is used, getHiveQlPartitions should only return the + // qualified partition; Otherwise, it return all the partitions. + val expectedNumPartitions = if (hivePruning == "true") 1 else 2 + assert(partValues.length == expectedNumPartitions) + } + } + } + } + } } From 1d41162e2e5cca471f12978eac9e69d4a66f40ae Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Thu, 16 Jun 2016 12:41:10 -0700 Subject: [PATCH 2/2] improve the test case --- .../hive/execution/HiveTableScanSuite.scala | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index 9a8e00c2f293c..76d3f3dbab01f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -91,6 +91,15 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi")) } + private def checkNumScannedPartitions(stmt: String, expectedNumParts: Int): Unit = { + val plan = sql(stmt).queryExecution.sparkPlan + val numPartitions = plan.collectFirst { + case p: HiveTableScanExec => + p.relation.getHiveQlPartitions(p.partitionPruningPred).length + }.getOrElse(0) + assert(numPartitions == expectedNumParts) + } + test("Verify SQLConf HIVE_METASTORE_PARTITION_PRUNING") { val view = "src" withTempTable(view) { @@ -105,23 +114,30 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH sql( s""" |FROM $view v - |INSERT OVERWRITE TABLE $table + |INSERT INTO TABLE $table |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') |SELECT v.id - |INSERT OVERWRITE TABLE $table + |INSERT INTO TABLE $table |PARTITION (p1='a',p2='c',p3='c',p4='d',p5='e') |SELECT v.id """.stripMargin) + Seq("true", "false").foreach { hivePruning => withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> hivePruning) { - val plan = sql(s"SELECT id, p2 FROM $table WHERE p2 <= 'b'").queryExecution.sparkPlan - val partValues = plan.flatMap { - case p: HiveTableScanExec => p.relation.getHiveQlPartitions(p.partitionPruningPred) - } // If the pruning predicate is used, getHiveQlPartitions should only return the // qualified partition; Otherwise, it return all the partitions. val expectedNumPartitions = if (hivePruning == "true") 1 else 2 - assert(partValues.length == expectedNumPartitions) + checkNumScannedPartitions( + stmt = s"SELECT id, p2 FROM $table WHERE p2 <= 'b'", expectedNumPartitions) + } + } + + Seq("true", "false").foreach { hivePruning => + withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> hivePruning) { + // If the pruning predicate does not exist, getHiveQlPartitions should always + // return all the partitions. + checkNumScannedPartitions( + stmt = s"SELECT id, p2 FROM $table WHERE id <= 3", expectedNumParts = 2) } } }