Skip to content

Commit

Permalink
[SPARK-10198] [SQL] Turn off partition verification by default
Browse files Browse the repository at this point in the history
Author: Michael Armbrust <michael@databricks.com>

Closes #8404 from marmbrus/turnOffPartitionVerification.
  • Loading branch information
marmbrus committed Aug 25, 2015
1 parent 69c9c17 commit 5c08c86
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 31 deletions.
2 changes: 1 addition & 1 deletion sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ private[spark] object SQLConf {
doc = "When true, enable filter pushdown for ORC files.")

val HIVE_VERIFY_PARTITION_PATH = booleanConf("spark.sql.hive.verifyPartitionPath",
defaultValue = Some(true),
defaultValue = Some(false),

This comment has been minimized.

Copy link
@CaptionKid

CaptionKid Jun 27, 2019

Why this place use false as default value

doc = "<TODO>")

val HIVE_METASTORE_PARTITION_PRUNING = booleanConf("spark.sql.hive.metastorePartitionPruning",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,50 +18,54 @@
package org.apache.spark.sql.hive

import com.google.common.io.Files
import org.apache.spark.sql.test.SQLTestUtils

import org.apache.spark.sql.{QueryTest, _}
import org.apache.spark.util.Utils


class QueryPartitionSuite extends QueryTest {
class QueryPartitionSuite extends QueryTest with SQLTestUtils {

private lazy val ctx = org.apache.spark.sql.hive.test.TestHive
import ctx.implicits._
import ctx.sql

protected def _sqlContext = ctx

test("SPARK-5068: query data when path doesn't exist"){
val testData = ctx.sparkContext.parallelize(
(1 to 10).map(i => TestData(i, i.toString))).toDF()
testData.registerTempTable("testData")
withSQLConf((SQLConf.HIVE_VERIFY_PARTITION_PATH.key, "true")) {
val testData = ctx.sparkContext.parallelize(
(1 to 10).map(i => TestData(i, i.toString))).toDF()
testData.registerTempTable("testData")

val tmpDir = Files.createTempDir()
// create the table for test
sql(s"CREATE TABLE table_with_partition(key int,value string) " +
s"PARTITIONED by (ds string) location '${tmpDir.toURI.toString}' ")
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " +
"SELECT key,value FROM testData")
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2') " +
"SELECT key,value FROM testData")
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3') " +
"SELECT key,value FROM testData")
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4') " +
"SELECT key,value FROM testData")
val tmpDir = Files.createTempDir()
// create the table for test
sql(s"CREATE TABLE table_with_partition(key int,value string) " +
s"PARTITIONED by (ds string) location '${tmpDir.toURI.toString}' ")
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " +
"SELECT key,value FROM testData")
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2') " +
"SELECT key,value FROM testData")
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3') " +
"SELECT key,value FROM testData")
sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4') " +
"SELECT key,value FROM testData")

// test for the exist path
checkAnswer(sql("select key,value from table_with_partition"),
testData.toDF.collect ++ testData.toDF.collect
++ testData.toDF.collect ++ testData.toDF.collect)
// test for the exist path
checkAnswer(sql("select key,value from table_with_partition"),
testData.toDF.collect ++ testData.toDF.collect
++ testData.toDF.collect ++ testData.toDF.collect)

// delete the path of one partition
tmpDir.listFiles
.find { f => f.isDirectory && f.getName().startsWith("ds=") }
.foreach { f => Utils.deleteRecursively(f) }
// delete the path of one partition
tmpDir.listFiles
.find { f => f.isDirectory && f.getName().startsWith("ds=") }
.foreach { f => Utils.deleteRecursively(f) }

// test for after delete the path
checkAnswer(sql("select key,value from table_with_partition"),
testData.toDF.collect ++ testData.toDF.collect ++ testData.toDF.collect)
// test for after delete the path
checkAnswer(sql("select key,value from table_with_partition"),
testData.toDF.collect ++ testData.toDF.collect ++ testData.toDF.collect)

sql("DROP TABLE table_with_partition")
sql("DROP TABLE createAndInsertTest")
sql("DROP TABLE table_with_partition")
sql("DROP TABLE createAndInsertTest")
}
}
}

0 comments on commit 5c08c86

Please sign in to comment.