From 48297818f37a8e02cc02ba6fa9ec04fe37540aca Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 6 Dec 2020 02:56:08 -0800 Subject: [PATCH] [SPARK-33667][SQL] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW PARTITIONS` ### What changes were proposed in this pull request? Preprocess the partition spec passed to the V1 SHOW PARTITIONS implementation `ShowPartitionsCommand`, and normalize the passed spec according to the partition columns w.r.t the case sensitivity flag **spark.sql.caseSensitive**. ### Why are the changes needed? V1 SHOW PARTITIONS is case sensitive in fact, and doesn't respect the SQL config **spark.sql.caseSensitive** which is false by default, for instance: ```sql spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int) > USING parquet > PARTITIONED BY (year, month); spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1; spark-sql> SHOW PARTITIONS tbl1 PARTITION(YEAR = 2015, Month = 1); Error in query: Non-partitioning column(s) [YEAR, Month] are specified for SHOW PARTITIONS; ``` The `SHOW PARTITIONS` command must show the partition `year = 2015, month = 1` specified by `YEAR = 2015, Month = 1`. ### Does this PR introduce _any_ user-facing change? Yes. After the changes, the command above works as expected: ```sql spark-sql> SHOW PARTITIONS tbl1 PARTITION(YEAR = 2015, Month = 1); year=2015/month=1 ``` ### How was this patch tested? By running the affected test suites: - `v1/ShowPartitionsSuite` - `v2/ShowPartitionsSuite` Closes #30615 from MaxGekk/show-partitions-case-sensitivity-test. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/command/tables.scala | 18 ++++++------ .../command/ShowPartitionsSuiteBase.scala | 28 +++++++++++++++++-- .../command/v1/ShowPartitionsSuite.scala | 4 --- .../command/v2/ShowPartitionsSuite.scala | 4 --- 4 files changed, 34 insertions(+), 20 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 9e3ca3c321a54..59adb7dd7e319 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -1006,20 +1006,18 @@ case class ShowPartitionsCommand( DDLUtils.verifyPartitionProviderIsHive(sparkSession, table, "SHOW PARTITIONS") /** - * Validate the partitioning spec by making sure all the referenced columns are + * Normalizes the partition spec w.r.t the partition columns and case sensitivity settings, + * and validates the spec by making sure all the referenced columns are * defined as partitioning columns in table definition. An AnalysisException exception is * thrown if the partitioning spec is invalid. */ - if (spec.isDefined) { - val badColumns = spec.get.keySet.filterNot(table.partitionColumnNames.contains) - if (badColumns.nonEmpty) { - val badCols = badColumns.mkString("[", ", ", "]") - throw new AnalysisException( - s"Non-partitioning column(s) $badCols are specified for SHOW PARTITIONS") - } - } + val normalizedSpec = spec.map(partitionSpec => PartitioningUtils.normalizePartitionSpec( + partitionSpec, + table.partitionColumnNames, + table.identifier.quotedString, + sparkSession.sessionState.conf.resolver)) - val partNames = catalog.listPartitionNames(tableName, spec) + val partNames = catalog.listPartitionNames(tableName, normalizedSpec) partNames.map(Row(_)) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala index 82457f96a3003..b695decdb3ec9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala @@ -21,6 +21,7 @@ import org.scalactic.source.Position import org.scalatest.Tag import org.apache.spark.sql.{AnalysisException, QueryTest, Row} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{StringType, StructType} @@ -28,7 +29,6 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { protected def version: String protected def catalog: String protected def defaultUsing: String - protected def wrongPartitionColumnsError(columns: String*): String // Gets the schema of `SHOW PARTITIONS` private val showSchema: StructType = new StructType().add("partition", StringType, false) protected def runShowPartitionsSql(sqlText: String, expected: Seq[Row]): Unit = { @@ -94,7 +94,7 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { val errMsg = intercept[AnalysisException] { sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)") }.getMessage - assert(errMsg.contains(wrongPartitionColumnsError("abcd", "xyz"))) + assert(errMsg.contains("abcd is not a valid partition column")) } } } @@ -149,4 +149,28 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils { } } } + + test("SPARK-33667: case sensitivity of partition spec") { + withNamespace(s"$catalog.ns") { + sql(s"CREATE NAMESPACE $catalog.ns") + val t = s"$catalog.ns.part_table" + withTable(t) { + sql(s""" + |CREATE TABLE $t (price int, qty int, year int, month int) + |$defaultUsing + |PARTITIONED BY (year, month)""".stripMargin) + sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1") + Seq( + true -> "PARTITION(year = 2015, month = 1)", + false -> "PARTITION(YEAR = 2015, Month = 1)" + ).foreach { case (caseSensitive, partitionSpec) => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + runShowPartitionsSql( + s"SHOW PARTITIONS $t $partitionSpec", + Row("year=2015/month=1") :: Nil) + } + } + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala index 2b2bc9e63dc82..c752a5f358bb9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala @@ -27,10 +27,6 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase { override def catalog: String = CatalogManager.SESSION_CATALOG_NAME override def defaultUsing: String = "USING parquet" - override protected def wrongPartitionColumnsError(columns: String*): String = { - s"Non-partitioning column(s) ${columns.mkString("[", ", ", "]")} are specified" - } - test("show everything in the default database") { val table = "dateTable" withTable(table) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala index ca47a713ad604..55985a335c94b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala @@ -32,10 +32,6 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSpa .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName) .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName) - override protected def wrongPartitionColumnsError(columns: String*): String = { - s"${columns.head} is not a valid partition column" - } - test("a table does not support partitioning") { val table = s"non_part_$catalog.tab1" withTable(table) {