Skip to content

Commit

Permalink
[SPARK-26435][SQL] Support creating partitioned table using Hive CTAS…
Browse files Browse the repository at this point in the history
… by specifying partition column names

## What changes were proposed in this pull request?

Spark SQL doesn't support creating partitioned table using Hive CTAS in SQL syntax. However it is supported by using DataFrameWriter API.

```scala
val df = Seq(("a", 1)).toDF("part", "id")
df.write.format("hive").partitionBy("part").saveAsTable("t")
```
Hive begins to support this syntax in newer version: https://issues.apache.org/jira/browse/HIVE-20241:

```
CREATE TABLE t PARTITIONED BY (part) AS SELECT 1 as id, "a" as part
```

This patch adds this support to SQL syntax.

## How was this patch tested?

Added tests.

Closes #23376 from viirya/hive-ctas-partitioned-table.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
viirya authored and cloud-fan committed Dec 27, 2018
1 parent 7c7fccf commit f89cdec
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ statement
(AS? query)? #createTable
| createTableHeader ('(' columns=colTypeList ')')?
((COMMENT comment=STRING) |
(PARTITIONED BY '(' partitionColumns=colTypeList ')') |
(PARTITIONED BY '(' partitionColumns=colTypeList ')' |
PARTITIONED BY partitionColumnNames=identifierList) |
bucketSpec |
skewSpec |
rowFormat |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1196,33 +1196,40 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {

selectQuery match {
case Some(q) =>
// Hive does not allow to use a CTAS statement to create a partitioned table.
if (tableDesc.partitionColumnNames.nonEmpty) {
val errorMessage = "A Create Table As Select (CTAS) statement is not allowed to " +
"create a partitioned table using Hive's file formats. " +
"Please use the syntax of \"CREATE TABLE tableName USING dataSource " +
"OPTIONS (...) PARTITIONED BY ...\" to create a partitioned table through a " +
"CTAS statement."
operationNotAllowed(errorMessage, ctx)
}

// Don't allow explicit specification of schema for CTAS.
if (schema.nonEmpty) {
if (dataCols.nonEmpty) {
operationNotAllowed(
"Schema may not be specified in a Create Table As Select (CTAS) statement",
ctx)
}

// When creating partitioned table with CTAS statement, we can't specify data type for the
// partition columns.
if (partitionCols.nonEmpty) {
val errorMessage = "Create Partitioned Table As Select cannot specify data type for " +
"the partition columns of the target table."
operationNotAllowed(errorMessage, ctx)
}

// Hive CTAS supports dynamic partition by specifying partition column names.
val partitionColumnNames =
Option(ctx.partitionColumnNames)
.map(visitIdentifierList(_).toArray)
.getOrElse(Array.empty[String])

val tableDescWithPartitionColNames =
tableDesc.copy(partitionColumnNames = partitionColumnNames)

val hasStorageProperties = (ctx.createFileFormat.size != 0) || (ctx.rowFormat.size != 0)
if (conf.convertCTAS && !hasStorageProperties) {
// At here, both rowStorage.serdeProperties and fileStorage.serdeProperties
// are empty Maps.
val newTableDesc = tableDesc.copy(
val newTableDesc = tableDescWithPartitionColNames.copy(
storage = CatalogStorageFormat.empty.copy(locationUri = locUri),
provider = Some(conf.defaultDataSourceName))
CreateTable(newTableDesc, mode, Some(q))
} else {
CreateTable(tableDesc, mode, Some(q))
CreateTable(tableDescWithPartitionColNames, mode, Some(q))
}
case None => CreateTable(tableDesc, mode, None)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package org.apache.spark.sql.hive.execution

import java.io.File
import java.net.URI
import java.util.Date

import scala.language.existentials

Expand All @@ -33,6 +32,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.hive.HiveExternalCatalog
Expand Down Expand Up @@ -2370,4 +2370,50 @@ class HiveDDLSuite
))
}
}

test("Hive CTAS can't create partitioned table by specifying schema") {
val err1 = intercept[ParseException] {
spark.sql(
s"""
|CREATE TABLE t (a int)
|PARTITIONED BY (b string)
|STORED AS parquet
|AS SELECT 1 as a, "a" as b
""".stripMargin)
}.getMessage
assert(err1.contains("Schema may not be specified in a Create Table As Select " +
"(CTAS) statement"))

val err2 = intercept[ParseException] {
spark.sql(
s"""
|CREATE TABLE t
|PARTITIONED BY (b string)
|STORED AS parquet
|AS SELECT 1 as a, "a" as b
""".stripMargin)
}.getMessage
assert(err2.contains("Create Partitioned Table As Select cannot specify data type for " +
"the partition columns of the target table"))
}

test("Hive CTAS with dynamic partition") {
Seq("orc", "parquet").foreach { format =>
withTable("t") {
withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
spark.sql(
s"""
|CREATE TABLE t
|PARTITIONED BY (b)
|STORED AS $format
|AS SELECT 1 as a, "a" as b
""".stripMargin)
checkAnswer(spark.table("t"), Row(1, "a"))

assert(spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
.partitionColumnNames === Seq("b"))
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -692,8 +692,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
|AS SELECT key, value FROM mytable1
""".stripMargin)
}.getMessage
assert(e.contains("A Create Table As Select (CTAS) statement is not allowed to " +
"create a partitioned table using Hive's file formats"))
assert(e.contains("Create Partitioned Table As Select cannot specify data type for " +
"the partition columns of the target table"))
}
}
}
Expand Down

0 comments on commit f89cdec

Please sign in to comment.