From adde008588cf8e05cf261c086201c27a8dd5584f Mon Sep 17 00:00:00 2001 From: ouyangxiaochen Date: Thu, 19 Jan 2017 11:15:17 +0800 Subject: [PATCH 1/7] spark-19115 --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 4 ++-- .../apache/spark/sql/execution/SparkSqlParser.scala | 10 +++++++--- .../apache/spark/sql/execution/command/tables.scala | 11 +++++++++-- .../apache/spark/sql/hive/HiveDDLCommandSuite.scala | 8 ++++---- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 3222a9cdc2c4e..9c64fdb968b98 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -81,8 +81,8 @@ statement rowFormat? createFileFormat? locationSpec? (TBLPROPERTIES tablePropertyList)? (AS? query)? #createHiveTable - | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier - LIKE source=tableIdentifier #createTableLike + | CREATE EXTERNAL? TABLE (IF NOT EXISTS)? target=tableIdentifier + LIKE source=tableIdentifier locationSpec? #createTableLike | ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS (identifier | FOR COLUMNS identifierSeq)? #analyze | ALTER (TABLE | VIEW) from=tableIdentifier diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 41768d451261a..4517f0aeb9917 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1140,14 +1140,18 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { * * For example: * {{{ - * CREATE TABLE [IF NOT EXISTS] [db_name.]table_name - * LIKE [other_db_name.]existing_table_name + * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name + * LIKE [other_db_name.]existing_table_name [locationSpec] * }}} */ override def visitCreateTableLike(ctx: CreateTableLikeContext): LogicalPlan = withOrigin(ctx) { val targetTable = visitTableIdentifier(ctx.target) val sourceTable = visitTableIdentifier(ctx.source) - CreateTableLikeCommand(targetTable, sourceTable, ctx.EXISTS != null) + val location = Option(ctx.locationSpec).map(visitLocationSpec) + if (ctx.EXTERNAL != null && location.isEmpty) { + operationNotAllowed("CREATE EXTERNAL TABLE LIKE must be accompanied by LOCATION", ctx) + } + CreateTableLikeCommand(targetTable, sourceTable, location, ctx.EXISTS != null) } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 246894813c3b9..8972c274c4d67 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -58,6 +58,7 @@ import org.apache.spark.util.Utils case class CreateTableLikeCommand( targetTable: TableIdentifier, sourceTable: TableIdentifier, + location: Option[String], ifNotExists: Boolean) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { @@ -70,12 +71,18 @@ case class CreateTableLikeCommand( sourceTableDesc.provider } + val tblType = if (location.isEmpty) { + CatalogTableType.MANAGED + } else { + CatalogTableType.EXTERNAL + } + val newTableDesc = CatalogTable( identifier = targetTable, - tableType = CatalogTableType.MANAGED, + tableType = tblType, // We are creating a new managed table, which should not have custom table location. - storage = sourceTableDesc.storage.copy(locationUri = None), + storage = sourceTableDesc.storage.copy(locationUri = location), schema = sourceTableDesc.schema, provider = newProvider, partitionColumnNames = sourceTableDesc.partitionColumnNames, diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index b67e5f6fe57a1..0ca42a8ae3d88 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -518,8 +518,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle test("create table like") { val v1 = "CREATE TABLE table1 LIKE table2" - val (target, source, exists) = parser.parsePlan(v1).collect { - case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting) + val (target, source, location, exists) = parser.parsePlan(v1).collect { + case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) }.head assert(exists == false) assert(target.database.isEmpty) @@ -528,8 +528,8 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle assert(source.table == "table2") val v2 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2" - val (target2, source2, exists2) = parser.parsePlan(v2).collect { - case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting) + val (target2, source2, location2, exists2) = parser.parsePlan(v2).collect { + case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) }.head assert(exists2) assert(target2.database.isEmpty) From 713ca9752a7e37b9e1d52dc0b3171c9c3888ab19 Mon Sep 17 00:00:00 2001 From: ouyangxiaochen Date: Sun, 22 Jan 2017 17:43:15 +0800 Subject: [PATCH 2/7] update test cases and comments --- .../spark/sql/execution/command/tables.scala | 9 +- .../sql/hive/execution/HiveDDLSuite.scala | 209 +++++++++++++++++- 2 files changed, 204 insertions(+), 14 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 8972c274c4d67..9b87412451251 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -42,7 +42,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.util.Utils /** - * A command to create a MANAGED table with the same definition of the given existing table. + * A command to create a table with the same definition of the given existing table. * In the target table definition, the table comment is always empty but the column comments * are identical to the ones defined in the source table. * @@ -51,8 +51,8 @@ import org.apache.spark.util.Utils * * The syntax of using this command in SQL is: * {{{ - * CREATE TABLE [IF NOT EXISTS] [db_name.]table_name - * LIKE [other_db_name.]existing_table_name + * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name + * LIKE [other_db_name.]existing_table_name [locationSpec] * }}} */ case class CreateTableLikeCommand( @@ -81,7 +81,8 @@ case class CreateTableLikeCommand( CatalogTable( identifier = targetTable, tableType = tblType, - // We are creating a new managed table, which should not have custom table location. + // If location is not empty the table we are creating is a new external table + // otherwise managed table. storage = sourceTableDesc.storage.copy(locationUri = location), schema = sourceTableDesc.schema, provider = newProvider, diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index edef30823b55c..ecbce5f494b2d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -826,7 +826,32 @@ class HiveDDLSuite val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, "MANAGED") + } + } + } + + test("CREATE [EXTERNAL] TABLE LIKE a temporary view LOCATION...") { + for ( i <- 0 to 1 ) { + withTempDir {tmpDir => + val sourceViewName = "tab1" + val targetTabName = "tab2" + val basePath = tmpDir.toURI + withTempView(sourceViewName) { + withTable(targetTabName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .createTempView(sourceViewName) + val tblType = if (i == 0) "" else "EXTERNAL" + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION $basePath") + + val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata( + TableIdentifier(sourceViewName)) + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + + checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") + } + } } } } @@ -847,7 +872,35 @@ class HiveDDLSuite assert(DDLUtils.isDatasourceTable(sourceTable)) assert(sourceTable.tableType == CatalogTableType.MANAGED) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, "MANAGED") + } + } + + test("CREATE [EXTERNAL] TABLE LIKE a data source table LOCATION...") { + for ( i <- 0 to 1 ) { + withTempDir { tmpDir => + val sourceTabName = "tab1" + val targetTabName = "tab2" + val basePath = tmpDir.toURI + withTable(sourceTabName, targetTabName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("json").saveAsTable(sourceTabName) + val tblType = if (i == 0) "" else "EXTERNAL" + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath") + + val sourceTable = + spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) + val targetTable = + spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + // The table type of the source table should be a Hive-managed data source table + assert(DDLUtils.isDatasourceTable(sourceTable)) + assert(sourceTable.tableType == CatalogTableType.MANAGED) + + checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") + } + } } } @@ -871,7 +924,38 @@ class HiveDDLSuite assert(DDLUtils.isDatasourceTable(sourceTable)) assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, "MANAGED") + } + } + } + + test("CREATE [EXTERNAL] TABLE LIKE an external data source table LOCATION...") { + for ( i <- 0 to 1 ) { + withTempDir { tmpDir => + val sourceTabName = "tab1" + val targetTabName = "tab2" + val basePath = tmpDir.toURI + withTable(sourceTabName, targetTabName) { + withTempPath { dir => + val path = dir.getCanonicalPath + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("parquet").save(path) + sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") + val tblType = if (i == 0) "" else "EXTERNAL" + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath") + + // The source table should be an external data source table + val sourceTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + // The table type of the source table should be an external data source table + assert(DDLUtils.isDatasourceTable(sourceTable)) + assert(sourceTable.tableType == CatalogTableType.EXTERNAL) + + checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") + } + } } } } @@ -889,7 +973,32 @@ class HiveDDLSuite assert(sourceTable.properties.get("prop1").nonEmpty) val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, "MANAGED") + } + } + + test("CREATE [EXTERNAL] TABLE LIKE a managed Hive serde table LOCATION...") { + for ( i <- 0 to 1 ) { + val catalog = spark.sessionState.catalog + withTempDir { tmpDir => + val sourceTabName = "tab1" + val targetTabName = "tab2" + val basePath = tmpDir.toURI + withTable(sourceTabName, targetTabName) { + sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") + val tblType = if (i == 0) "" else "EXTERNAL" + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath") + + val sourceTable = catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) + assert(sourceTable.tableType == CatalogTableType.MANAGED) + assert(sourceTable.properties.get("prop1").nonEmpty) + val targetTable = catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + + checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") + } + } } } @@ -923,11 +1032,55 @@ class HiveDDLSuite assert(sourceTable.comment == Option("Apache Spark")) val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable) + checkCreateTableLike(sourceTable, targetTable, "MANAGED") } } } + test("CREATE [EXTERNAL] TABLE LIKE an external Hive serde table LOCATION...") { + for ( i <- 0 to 1 ) { + val catalog = spark.sessionState.catalog + withTempDir { tmpDir => + val basePath = tmpDir.toURI + withTempDir { tmpDir1 => + val basePath1 = tmpDir1.toURI + val sourceTabName = "tab1" + val targetTabName = "tab2" + withTable(sourceTabName, targetTabName) { + assert(tmpDir.listFiles.isEmpty) + sql( + s""" + |CREATE EXTERNAL TABLE $sourceTabName (key INT comment 'test', value STRING) + |COMMENT 'Apache Spark' + |PARTITIONED BY (ds STRING, hr STRING) + |LOCATION '$basePath' + """.stripMargin) + for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { + sql( + s""" + |INSERT OVERWRITE TABLE $sourceTabName + |partition (ds='$ds',hr='$hr') + |SELECT 1, 'a' + """.stripMargin) + } + val tblType = if (i == 0) "" else "EXTERNAL" + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath1") + + val sourceTable = catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) + assert(sourceTable.tableType == CatalogTableType.EXTERNAL) + assert(sourceTable.comment == Option("Apache Spark")) + val targetTable = catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + + checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") + } + } + } + } + + } + test("CREATE TABLE LIKE a view") { val sourceTabName = "tab1" val sourceViewName = "view" @@ -947,15 +1100,51 @@ class HiveDDLSuite val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceView, targetTable) + checkCreateTableLike(sourceView, targetTable, "MANAGED") + } + } + } + + test("CREATE [EXTERNAL] TABLE LIKE a view LOCATION...") { + for ( i <- 0 to 1 ) { + withTempDir { tmpDir => + val sourceTabName = "tab1" + val sourceViewName = "view" + val targetTabName = "tab2" + val basePath = tmpDir.toURI + withTable(sourceTabName, targetTabName) { + withView(sourceViewName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("json").saveAsTable(sourceTabName) + sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") + val tblType = if (i == 0) "" else "EXTERNAL" + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION $basePath") + + val sourceView = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceViewName, Some("default"))) + // The original source should be a VIEW with an empty path + assert(sourceView.tableType == CatalogTableType.VIEW) + assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + + checkCreateTableLike(sourceView, targetTable, "EXTERNAL") + } + } } } + } - private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: CatalogTable): Unit = { - // The created table should be a MANAGED table with empty view text and original text. - assert(targetTable.tableType == CatalogTableType.MANAGED, - "the created table must be a Hive managed table") + private def checkCreateTableLike( + sourceTable: CatalogTable, + targetTable: CatalogTable, + tableType: String): Unit = { + // The created table should be a MANAGED table or EXTERNAL table with empty view text + // and original text. + val expectTableType = CatalogTableType.apply(tableType) + assert(targetTable.tableType == expectTableType, + s"the created table must be a Hive ${expectTableType.name} table") assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty, "the view text and original text in the created table must be empty") assert(targetTable.comment.isEmpty, From b80f8e66e1cbb7111c090358cabc925c6af233d2 Mon Sep 17 00:00:00 2001 From: ouyangxiaochen Date: Tue, 24 Jan 2017 11:13:40 +0800 Subject: [PATCH 3/7] 1. add a comment for method visitCreateTableLike in SparkSqlParser.scala file. 2. repair the error for test cases in HiveDDLSuite.scala file, sql statements lost a pair of single quotes. --- .../apache/spark/sql/execution/SparkSqlParser.scala | 1 + .../spark/sql/hive/execution/HiveDDLSuite.scala | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 4517f0aeb9917..82fbc4bd6c0de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1149,6 +1149,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { val sourceTable = visitTableIdentifier(ctx.source) val location = Option(ctx.locationSpec).map(visitLocationSpec) if (ctx.EXTERNAL != null && location.isEmpty) { + // If we are creating an EXTERNAL table, then the LOCATION field is required operationNotAllowed("CREATE EXTERNAL TABLE LIKE must be accompanied by LOCATION", ctx) } CreateTableLikeCommand(targetTable, sourceTable, location, ctx.EXISTS != null) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index ecbce5f494b2d..a11d4bfddbb52 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -842,7 +842,7 @@ class HiveDDLSuite spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .createTempView(sourceViewName) val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION $basePath") + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION '$basePath'") val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata( TableIdentifier(sourceViewName)) @@ -886,7 +886,7 @@ class HiveDDLSuite spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write.format("json").saveAsTable(sourceTabName) val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath") + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") val sourceTable = spark.sessionState.catalog.getTableMetadata( @@ -942,7 +942,7 @@ class HiveDDLSuite .write.format("parquet").save(path) sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath") + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") // The source table should be an external data source table val sourceTable = spark.sessionState.catalog.getTableMetadata( @@ -987,7 +987,7 @@ class HiveDDLSuite withTable(sourceTabName, targetTabName) { sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath") + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") val sourceTable = catalog.getTableMetadata( TableIdentifier(sourceTabName, Some("default"))) @@ -1064,7 +1064,7 @@ class HiveDDLSuite """.stripMargin) } val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION $basePath1") + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath1'") val sourceTable = catalog.getTableMetadata( TableIdentifier(sourceTabName, Some("default"))) @@ -1118,7 +1118,7 @@ class HiveDDLSuite .write.format("json").saveAsTable(sourceTabName) sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION $basePath") + sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION '$basePath'") val sourceView = spark.sessionState.catalog.getTableMetadata( TableIdentifier(sourceViewName, Some("default"))) From 71f1d1202e8f95194499e4aa5168fff08cc40427 Mon Sep 17 00:00:00 2001 From: ouyangxiaochen Date: Wed, 8 Feb 2017 09:42:41 +0800 Subject: [PATCH 4/7] 1.remove EXTERNAL key word in sqlbase.g4 file 2.simplify the logic: if location is specified, we create an external table internally. Else, create managed table 3.update test cases --- .../spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../spark/sql/execution/SparkSqlParser.scala | 6 +- .../spark/sql/execution/command/tables.scala | 4 +- .../sql/hive/execution/HiveDDLSuite.scala | 223 +++++------------- 4 files changed, 63 insertions(+), 172 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 9c64fdb968b98..164b5df06746e 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -81,7 +81,7 @@ statement rowFormat? createFileFormat? locationSpec? (TBLPROPERTIES tablePropertyList)? (AS? query)? #createHiveTable - | CREATE EXTERNAL? TABLE (IF NOT EXISTS)? target=tableIdentifier + | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier LIKE source=tableIdentifier locationSpec? #createTableLike | ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS (identifier | FOR COLUMNS identifierSeq)? #analyze diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 82fbc4bd6c0de..ca76a10f79467 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1140,7 +1140,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { * * For example: * {{{ - * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name + * CREATE TABLE [IF NOT EXISTS] [db_name.]table_name * LIKE [other_db_name.]existing_table_name [locationSpec] * }}} */ @@ -1148,10 +1148,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { val targetTable = visitTableIdentifier(ctx.target) val sourceTable = visitTableIdentifier(ctx.source) val location = Option(ctx.locationSpec).map(visitLocationSpec) - if (ctx.EXTERNAL != null && location.isEmpty) { - // If we are creating an EXTERNAL table, then the LOCATION field is required - operationNotAllowed("CREATE EXTERNAL TABLE LIKE must be accompanied by LOCATION", ctx) - } CreateTableLikeCommand(targetTable, sourceTable, location, ctx.EXISTS != null) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 9b87412451251..446c8e6de2993 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -71,6 +71,8 @@ case class CreateTableLikeCommand( sourceTableDesc.provider } + // If location is specified, we create an external table internally. + // Else create managed table. val tblType = if (location.isEmpty) { CatalogTableType.MANAGED } else { @@ -81,8 +83,6 @@ case class CreateTableLikeCommand( CatalogTable( identifier = targetTable, tableType = tblType, - // If location is not empty the table we are creating is a new external table - // otherwise managed table. storage = sourceTableDesc.storage.copy(locationUri = location), schema = sourceTableDesc.schema, provider = newProvider, diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index a11d4bfddbb52..edc1690f38fe3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -812,26 +812,8 @@ class HiveDDLSuite } } - test("CREATE TABLE LIKE a temporary view") { - val sourceViewName = "tab1" - val targetTabName = "tab2" - withTempView(sourceViewName) { - withTable(targetTabName) { - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .createTempView(sourceViewName) - sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") - - val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata( - TableIdentifier(sourceViewName)) - val targetTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - - checkCreateTableLike(sourceTable, targetTable, "MANAGED") - } - } - } - - test("CREATE [EXTERNAL] TABLE LIKE a temporary view LOCATION...") { + test("CREATE TABLE LIKE a temporary view [LOCATION]...") { + var createdTableType = "MANAGED" for ( i <- 0 to 1 ) { withTempDir {tmpDir => val sourceViewName = "tab1" @@ -841,42 +823,28 @@ class HiveDDLSuite withTable(targetTabName) { spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .createTempView(sourceViewName) - val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION '$basePath'") + if (i == 0) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName ") + } else { + createdTableType = "EXTERNAL" + sql(s"CREATE TABLE $targetTabName " + + s"LIKE $sourceViewName LOCATION '$basePath'") + } val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata( TableIdentifier(sourceViewName)) val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") + checkCreateTableLike(sourceTable, targetTable, createdTableType) } } } } } - test("CREATE TABLE LIKE a data source table") { - val sourceTabName = "tab1" - val targetTabName = "tab2" - withTable(sourceTabName, targetTabName) { - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .write.format("json").saveAsTable(sourceTabName) - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - - val sourceTable = - spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) - val targetTable = - spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) - // The table type of the source table should be a Hive-managed data source table - assert(DDLUtils.isDatasourceTable(sourceTable)) - assert(sourceTable.tableType == CatalogTableType.MANAGED) - - checkCreateTableLike(sourceTable, targetTable, "MANAGED") - } - } - - test("CREATE [EXTERNAL] TABLE LIKE a data source table LOCATION...") { + test("CREATE TABLE LIKE a data source table [LOCATION]...") { + var createdTableType = "MANAGED" for ( i <- 0 to 1 ) { withTempDir { tmpDir => val sourceTabName = "tab1" @@ -885,8 +853,12 @@ class HiveDDLSuite withTable(sourceTabName, targetTabName) { spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write.format("json").saveAsTable(sourceTabName) - val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") + if ( i == 0 ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + } else { + createdTableType = "EXTERNAL" + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") + } val sourceTable = spark.sessionState.catalog.getTableMetadata( @@ -898,38 +870,14 @@ class HiveDDLSuite assert(DDLUtils.isDatasourceTable(sourceTable)) assert(sourceTable.tableType == CatalogTableType.MANAGED) - checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") + checkCreateTableLike(sourceTable, targetTable, createdTableType) } } } } - test("CREATE TABLE LIKE an external data source table") { - val sourceTabName = "tab1" - val targetTabName = "tab2" - withTable(sourceTabName, targetTabName) { - withTempPath { dir => - val path = dir.getCanonicalPath - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .write.format("parquet").save(path) - sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - - // The source table should be an external data source table - val sourceTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(sourceTabName, Some("default"))) - val targetTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - // The table type of the source table should be an external data source table - assert(DDLUtils.isDatasourceTable(sourceTable)) - assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - - checkCreateTableLike(sourceTable, targetTable, "MANAGED") - } - } - } - - test("CREATE [EXTERNAL] TABLE LIKE an external data source table LOCATION...") { + test("CREATE TABLE LIKE an external data source table [LOCATION]...") { + var createdTableType = "MANAGED" for ( i <- 0 to 1 ) { withTempDir { tmpDir => val sourceTabName = "tab1" @@ -941,8 +889,12 @@ class HiveDDLSuite spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write.format("parquet").save(path) sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") - val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") + if ( i == 0 ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + } else { + createdTableType = "EXTERNAL" + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") + } // The source table should be an external data source table val sourceTable = spark.sessionState.catalog.getTableMetadata( @@ -953,31 +905,15 @@ class HiveDDLSuite assert(DDLUtils.isDatasourceTable(sourceTable)) assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") + checkCreateTableLike(sourceTable, targetTable, createdTableType) } } } } } - test("CREATE TABLE LIKE a managed Hive serde table") { - val catalog = spark.sessionState.catalog - val sourceTabName = "tab1" - val targetTabName = "tab2" - withTable(sourceTabName, targetTabName) { - sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - - val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) - assert(sourceTable.tableType == CatalogTableType.MANAGED) - assert(sourceTable.properties.get("prop1").nonEmpty) - val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) - - checkCreateTableLike(sourceTable, targetTable, "MANAGED") - } - } - - test("CREATE [EXTERNAL] TABLE LIKE a managed Hive serde table LOCATION...") { + test("CREATE TABLE LIKE a managed Hive serde table [LOCATION]...") { + var createdTableType = "MANAGED" for ( i <- 0 to 1 ) { val catalog = spark.sessionState.catalog withTempDir { tmpDir => @@ -986,8 +922,13 @@ class HiveDDLSuite val basePath = tmpDir.toURI withTable(sourceTabName, targetTabName) { sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") - val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") + + if ( i == 0 ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + } else { + createdTableType = "EXTERNAL" + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") + } val sourceTable = catalog.getTableMetadata( TableIdentifier(sourceTabName, Some("default"))) @@ -996,48 +937,14 @@ class HiveDDLSuite val targetTable = catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") - } - } - } - } - - test("CREATE TABLE LIKE an external Hive serde table") { - val catalog = spark.sessionState.catalog - withTempDir { tmpDir => - val basePath = tmpDir.toURI - val sourceTabName = "tab1" - val targetTabName = "tab2" - withTable(sourceTabName, targetTabName) { - assert(tmpDir.listFiles.isEmpty) - sql( - s""" - |CREATE EXTERNAL TABLE $sourceTabName (key INT comment 'test', value STRING) - |COMMENT 'Apache Spark' - |PARTITIONED BY (ds STRING, hr STRING) - |LOCATION '$basePath' - """.stripMargin) - for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { - sql( - s""" - |INSERT OVERWRITE TABLE $sourceTabName - |partition (ds='$ds',hr='$hr') - |SELECT 1, 'a' - """.stripMargin) + checkCreateTableLike(sourceTable, targetTable, createdTableType) } - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - - val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default"))) - assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - assert(sourceTable.comment == Option("Apache Spark")) - val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default"))) - - checkCreateTableLike(sourceTable, targetTable, "MANAGED") } } } - test("CREATE [EXTERNAL] TABLE LIKE an external Hive serde table LOCATION...") { + test("CREATE TABLE LIKE an external Hive serde table [LOCATION]...") { + var createdTableType = "MANAGED" for ( i <- 0 to 1 ) { val catalog = spark.sessionState.catalog withTempDir { tmpDir => @@ -1063,8 +970,14 @@ class HiveDDLSuite |SELECT 1, 'a' """.stripMargin) } - val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath1'") + + if ( i == 0 ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + } else { + createdTableType = "EXTERNAL" + sql(s"CREATE TABLE $targetTabName " + + s"LIKE $sourceTabName LOCATION '$basePath1'") + } val sourceTable = catalog.getTableMetadata( TableIdentifier(sourceTabName, Some("default"))) @@ -1073,7 +986,7 @@ class HiveDDLSuite val targetTable = catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable, "EXTERNAL") + checkCreateTableLike(sourceTable, targetTable, createdTableType) } } } @@ -1081,31 +994,8 @@ class HiveDDLSuite } - test("CREATE TABLE LIKE a view") { - val sourceTabName = "tab1" - val sourceViewName = "view" - val targetTabName = "tab2" - withTable(sourceTabName, targetTabName) { - withView(sourceViewName) { - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .write.format("json").saveAsTable(sourceTabName) - sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") - sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") - - val sourceView = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(sourceViewName, Some("default"))) - // The original source should be a VIEW with an empty path - assert(sourceView.tableType == CatalogTableType.VIEW) - assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) - val targetTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - - checkCreateTableLike(sourceView, targetTable, "MANAGED") - } - } - } - - test("CREATE [EXTERNAL] TABLE LIKE a view LOCATION...") { + test("CREATE TABLE LIKE a view [LOCATION]...") { + var createdTableType = "MANAGED" for ( i <- 0 to 1 ) { withTempDir { tmpDir => val sourceTabName = "tab1" @@ -1117,8 +1007,13 @@ class HiveDDLSuite spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) .write.format("json").saveAsTable(sourceTabName) sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") - val tblType = if (i == 0) "" else "EXTERNAL" - sql(s"CREATE $tblType TABLE $targetTabName LIKE $sourceViewName LOCATION '$basePath'") + + if ( i == 0 ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") + } else { + createdTableType = "EXTERNAL" + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName LOCATION '$basePath'") + } val sourceView = spark.sessionState.catalog.getTableMetadata( TableIdentifier(sourceViewName, Some("default"))) @@ -1128,7 +1023,7 @@ class HiveDDLSuite val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceView, targetTable, "EXTERNAL") + checkCreateTableLike(sourceView, targetTable, createdTableType) } } } From 9e59fb4671a8c2fe896833749d58beed1722ad7f Mon Sep 17 00:00:00 2001 From: ouyangxiaochen Date: Wed, 8 Feb 2017 17:41:12 +0800 Subject: [PATCH 5/7] update test cases --- .../spark/sql/hive/HiveDDLCommandSuite.scala | 34 +- .../sql/hive/execution/HiveDDLSuite.scala | 394 ++++++++++-------- 2 files changed, 241 insertions(+), 187 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index 0ca42a8ae3d88..d6384913b88e9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -387,11 +387,11 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle val query2 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1')" val query3 = s""" - |$baseQuery DELIMITED FIELDS TERMINATED BY 'x' ESCAPED BY 'y' - |COLLECTION ITEMS TERMINATED BY 'a' - |MAP KEYS TERMINATED BY 'b' - |LINES TERMINATED BY '\n' - |NULL DEFINED AS 'c' + |$baseQuery DELIMITED FIELDS TERMINATED BY 'x' ESCAPED BY 'y' + |COLLECTION ITEMS TERMINATED BY 'a' + |MAP KEYS TERMINATED BY 'b' + |LINES TERMINATED BY '\n' + |NULL DEFINED AS 'c' """.stripMargin val (desc1, _) = extractTableDesc(query1) val (desc2, _) = extractTableDesc(query2) @@ -526,6 +526,7 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle assert(target.table == "table1") assert(source.database.isEmpty) assert(source.table == "table2") + assert(location.isEmpty) val v2 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2" val (target2, source2, location2, exists2) = parser.parsePlan(v2).collect { @@ -536,6 +537,29 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle assert(target2.table == "table1") assert(source2.database.isEmpty) assert(source2.table == "table2") + assert(location2.isEmpty) + + val v3 = "CREATE TABLE table1 LIKE table2 LOCATION '/spark/warehouse'" + val (target3, source3, location3, exists3) = parser.parsePlan(v3).collect { + case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) + }.head + assert(!exists3) + assert(target3.database.isEmpty) + assert(target3.table == "table1") + assert(source3.database.isEmpty) + assert(source3.table == "table2") + assert(location3 == Some("/spark/warehouse")) + + val v4 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2 LOCATION '/spark/warehouse'" + val (target4, source4, location4, exists4) = parser.parsePlan(v4).collect { + case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting) + }.head + assert(exists4) + assert(target4.database.isEmpty) + assert(target4.table == "table1") + assert(source4.database.isEmpty) + assert(source4.table == "table2") + assert(location4 == Some("/spark/warehouse")) } test("load data") { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index edc1690f38fe3..d495bd258b21e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -54,6 +54,7 @@ class HiveDDLSuite dbPath: Option[String] = None): Boolean = { val expectedTablePath = if (dbPath.isEmpty) { + hiveContext.sessionState.catalog.hiveDefaultTableFilePath(tableIdentifier) } else { new Path(new Path(dbPath.get), tableIdentifier.table).toString @@ -812,234 +813,263 @@ class HiveDDLSuite } } - test("CREATE TABLE LIKE a temporary view [LOCATION]...") { - var createdTableType = "MANAGED" - for ( i <- 0 to 1 ) { - withTempDir {tmpDir => - val sourceViewName = "tab1" - val targetTabName = "tab2" - val basePath = tmpDir.toURI - withTempView(sourceViewName) { - withTable(targetTabName) { - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .createTempView(sourceViewName) - if (i == 0) { - sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName ") - } else { - createdTableType = "EXTERNAL" - sql(s"CREATE TABLE $targetTabName " + - s"LIKE $sourceViewName LOCATION '$basePath'") - } - - val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata( - TableIdentifier(sourceViewName)) - val targetTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - - checkCreateTableLike(sourceTable, targetTable, createdTableType) - } - } - } + test("CREATE TABLE LIKE a temporary view") { + // create table like a temporary view. + withCreateTableLikeTempView(None) + + // create table like a temporary view location ... + withTempDir {tmpDir => + withCreateTableLikeTempView(Some(tmpDir.toURI.toString)) } } - test("CREATE TABLE LIKE a data source table [LOCATION]...") { - var createdTableType = "MANAGED" - for ( i <- 0 to 1 ) { - withTempDir { tmpDir => - val sourceTabName = "tab1" - val targetTabName = "tab2" - val basePath = tmpDir.toURI - withTable(sourceTabName, targetTabName) { - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .write.format("json").saveAsTable(sourceTabName) - if ( i == 0 ) { - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - } else { - createdTableType = "EXTERNAL" - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") - } + private def withCreateTableLikeTempView(location : Option[String]): Unit = { + val sourceViewName = "tab1" + val targetTabName = "tab2" + var createdTableType = CatalogTableType.MANAGED + withTempView(sourceViewName) { + withTable(targetTabName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .createTempView(sourceViewName) + if (location.isEmpty) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName ") + } else { + createdTableType = CatalogTableType.EXTERNAL + sql(s"CREATE TABLE $targetTabName " + + s"LIKE $sourceViewName LOCATION '$location'") + } - val sourceTable = - spark.sessionState.catalog.getTableMetadata( - TableIdentifier(sourceTabName, Some("default"))) - val targetTable = - spark.sessionState.catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - // The table type of the source table should be a Hive-managed data source table - assert(DDLUtils.isDatasourceTable(sourceTable)) - assert(sourceTable.tableType == CatalogTableType.MANAGED) + val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata( + TableIdentifier(sourceViewName)) + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) - checkCreateTableLike(sourceTable, targetTable, createdTableType) - } + checkCreateTableLike(sourceTable, targetTable, createdTableType) } } } - test("CREATE TABLE LIKE an external data source table [LOCATION]...") { - var createdTableType = "MANAGED" - for ( i <- 0 to 1 ) { + test("CREATE TABLE LIKE a data source table") { + // create table like a data source table. + withCreateTableLikeDSTable(None) + + // create table like a data source table location ... withTempDir { tmpDir => - val sourceTabName = "tab1" - val targetTabName = "tab2" - val basePath = tmpDir.toURI - withTable(sourceTabName, targetTabName) { - withTempPath { dir => - val path = dir.getCanonicalPath - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .write.format("parquet").save(path) - sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") - if ( i == 0 ) { - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - } else { - createdTableType = "EXTERNAL" - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") - } - - // The source table should be an external data source table - val sourceTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(sourceTabName, Some("default"))) - val targetTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - // The table type of the source table should be an external data source table - assert(DDLUtils.isDatasourceTable(sourceTable)) - assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - - checkCreateTableLike(sourceTable, targetTable, createdTableType) - } - } + withCreateTableLikeDSTable(Some(tmpDir.toURI.toString)) + } + } + + private def withCreateTableLikeDSTable(location : Option[String]): Unit = { + val sourceTabName = "tab1" + val targetTabName = "tab2" + var createdTableType = CatalogTableType.MANAGED + withTable(sourceTabName, targetTabName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("json").saveAsTable(sourceTabName) + if ( location.isEmpty ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + } else { + createdTableType = CatalogTableType.EXTERNAL + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$location'") } + + val sourceTable = + spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) + val targetTable = + spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + // The table type of the source table should be a Hive-managed data source table + assert(DDLUtils.isDatasourceTable(sourceTable)) + assert(sourceTable.tableType == CatalogTableType.MANAGED) + + checkCreateTableLike(sourceTable, targetTable, createdTableType) } } - test("CREATE TABLE LIKE a managed Hive serde table [LOCATION]...") { - var createdTableType = "MANAGED" - for ( i <- 0 to 1 ) { - val catalog = spark.sessionState.catalog - withTempDir { tmpDir => - val sourceTabName = "tab1" - val targetTabName = "tab2" - val basePath = tmpDir.toURI - withTable(sourceTabName, targetTabName) { - sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") + test("CREATE TABLE LIKE an external data source table") { + // CREATE TABLE LIKE an external data source table. + withCreateTableLikeExtDSTable(None) + + // CREATE TABLE LIKE an external data source table location ... + withTempDir { tmpDir => + withCreateTableLikeExtDSTable(Some(tmpDir.toURI.toString)) + } + } - if ( i == 0 ) { + private def withCreateTableLikeExtDSTable(location : Option[String]): Unit = { + val sourceTabName = "tab1" + val targetTabName = "tab2" + var createdTableType = CatalogTableType.MANAGED + withTable(sourceTabName, targetTabName) { + withTempPath { dir => + val path = dir.getCanonicalPath + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("parquet").save(path) + sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") + if ( location.isEmpty ) { sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") } else { - createdTableType = "EXTERNAL" - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$basePath'") + createdTableType = CatalogTableType.EXTERNAL + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$location'") } - val sourceTable = catalog.getTableMetadata( + // The source table should be an external data source table + val sourceTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(sourceTabName, Some("default"))) - assert(sourceTable.tableType == CatalogTableType.MANAGED) - assert(sourceTable.properties.get("prop1").nonEmpty) - val targetTable = catalog.getTableMetadata( + val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) + // The table type of the source table should be an external data source table + assert(DDLUtils.isDatasourceTable(sourceTable)) + assert(sourceTable.tableType == CatalogTableType.EXTERNAL) checkCreateTableLike(sourceTable, targetTable, createdTableType) } } } + + + test("CREATE TABLE LIKE a managed Hive serde table") { + // CREATE TABLE LIKE a managed Hive serde table. + withCreateTableLikeManagedHiveTable(None) + + // CREATE TABLE LIKE a managed Hive serde table location ... + withTempDir { tmpDir => + withCreateTableLikeManagedHiveTable(Some(tmpDir.toURI.toString)) + } } - test("CREATE TABLE LIKE an external Hive serde table [LOCATION]...") { - var createdTableType = "MANAGED" - for ( i <- 0 to 1 ) { - val catalog = spark.sessionState.catalog - withTempDir { tmpDir => - val basePath = tmpDir.toURI - withTempDir { tmpDir1 => - val basePath1 = tmpDir1.toURI - val sourceTabName = "tab1" - val targetTabName = "tab2" - withTable(sourceTabName, targetTabName) { - assert(tmpDir.listFiles.isEmpty) - sql( - s""" - |CREATE EXTERNAL TABLE $sourceTabName (key INT comment 'test', value STRING) - |COMMENT 'Apache Spark' - |PARTITIONED BY (ds STRING, hr STRING) - |LOCATION '$basePath' + private def withCreateTableLikeManagedHiveTable(location : Option[String]): Unit = { + val sourceTabName = "tab1" + val targetTabName = "tab2" + var createdTableType = CatalogTableType.MANAGED + val catalog = spark.sessionState.catalog + withTable(sourceTabName, targetTabName) { + sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'") + + if ( location.isEmpty ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + } else { + createdTableType = CatalogTableType.EXTERNAL + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$location'") + } + + val sourceTable = catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) + assert(sourceTable.tableType == CatalogTableType.MANAGED) + assert(sourceTable.properties.get("prop1").nonEmpty) + val targetTable = catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + + checkCreateTableLike(sourceTable, targetTable, createdTableType) + } + } + + test("CREATE TABLE LIKE an external Hive serde table") { + // CREATE TABLE LIKE an external Hive serde table. + withCreateTableLikeExtHiveTable(None) + + // CREATE TABLE LIKE an external Hive serde table location ... + withTempDir { tmpDir => + withCreateTableLikeExtHiveTable(Some(tmpDir.toURI.toString)) + } + + } + + private def withCreateTableLikeExtHiveTable(location : Option[String]): Unit = { + val catalog = spark.sessionState.catalog + var createdTableType = CatalogTableType.MANAGED + withTempDir { tmpDir => + val basePath1 = tmpDir.toURI + val sourceTabName = "tab1" + val targetTabName = "tab2" + withTable(sourceTabName, targetTabName) { + assert(tmpDir.listFiles.isEmpty) + sql( + s""" + |CREATE EXTERNAL TABLE $sourceTabName (key INT comment 'test', value STRING) + |COMMENT 'Apache Spark' + |PARTITIONED BY (ds STRING, hr STRING) + |LOCATION '$basePath1' """.stripMargin) - for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { - sql( - s""" - |INSERT OVERWRITE TABLE $sourceTabName - |partition (ds='$ds',hr='$hr') - |SELECT 1, 'a' + for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { + sql( + s""" + |INSERT OVERWRITE TABLE $sourceTabName + |partition (ds='$ds',hr='$hr') + |SELECT 1, 'a' """.stripMargin) - } - - if ( i == 0 ) { - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - } else { - createdTableType = "EXTERNAL" - sql(s"CREATE TABLE $targetTabName " + - s"LIKE $sourceTabName LOCATION '$basePath1'") - } - - val sourceTable = catalog.getTableMetadata( - TableIdentifier(sourceTabName, Some("default"))) - assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - assert(sourceTable.comment == Option("Apache Spark")) - val targetTable = catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - - checkCreateTableLike(sourceTable, targetTable, createdTableType) - } } + + if ( location.isEmpty ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + } else { + createdTableType = CatalogTableType.EXTERNAL + sql(s"CREATE TABLE $targetTabName " + + s"LIKE $sourceTabName LOCATION '$location'") + } + + val sourceTable = catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) + assert(sourceTable.tableType == CatalogTableType.EXTERNAL) + assert(sourceTable.comment == Option("Apache Spark")) + val targetTable = catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + + checkCreateTableLike(sourceTable, targetTable, createdTableType) } } + } + + test("CREATE TABLE LIKE a view") { + // CREATE TABLE LIKE a view. + withCreateTableLikeView(None) + // CREATE TABLE LIKE a view location ... + withTempDir { tmpDir => + withCreateTableLikeView(Some(tmpDir.toURI.toString)) + } } - test("CREATE TABLE LIKE a view [LOCATION]...") { - var createdTableType = "MANAGED" - for ( i <- 0 to 1 ) { - withTempDir { tmpDir => - val sourceTabName = "tab1" - val sourceViewName = "view" - val targetTabName = "tab2" - val basePath = tmpDir.toURI - withTable(sourceTabName, targetTabName) { - withView(sourceViewName) { - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .write.format("json").saveAsTable(sourceTabName) - sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") - - if ( i == 0 ) { - sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") - } else { - createdTableType = "EXTERNAL" - sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName LOCATION '$basePath'") - } - - val sourceView = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(sourceViewName, Some("default"))) - // The original source should be a VIEW with an empty path - assert(sourceView.tableType == CatalogTableType.VIEW) - assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) - val targetTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - - checkCreateTableLike(sourceView, targetTable, createdTableType) - } + private def withCreateTableLikeView(location : Option[String]): Unit = { + val sourceTabName = "tab1" + val sourceViewName = "view" + val targetTabName = "tab2" + var createdTableType = CatalogTableType.MANAGED + withTable(sourceTabName, targetTabName) { + withView(sourceViewName) { + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("json").saveAsTable(sourceTabName) + sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName") + + if ( location.isEmpty ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName") + } else { + createdTableType = CatalogTableType.EXTERNAL + sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName LOCATION '$location'") } + + val sourceView = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceViewName, Some("default"))) + // The original source should be a VIEW with an empty path + assert(sourceView.tableType == CatalogTableType.VIEW) + assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + + checkCreateTableLike(sourceView, targetTable, createdTableType) } } - } private def checkCreateTableLike( sourceTable: CatalogTable, targetTable: CatalogTable, - tableType: String): Unit = { + tableType: CatalogTableType): Unit = { // The created table should be a MANAGED table or EXTERNAL table with empty view text // and original text. - val expectTableType = CatalogTableType.apply(tableType) - assert(targetTable.tableType == expectTableType, - s"the created table must be a Hive ${expectTableType.name} table") + assert(targetTable.tableType == tableType, + s"the created table must be a Hive ${tableType.name} table") assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty, "the view text and original text in the created table must be empty") assert(targetTable.comment.isEmpty, From bb3660a34fdcb7b3aa5bab7e4216c211f1a01749 Mon Sep 17 00:00:00 2001 From: ouyangxiaochen Date: Wed, 8 Feb 2017 18:16:38 +0800 Subject: [PATCH 6/7] resolve the conflict --- .../spark/sql/execution/command/tables.scala | 2 +- .../sql/hive/execution/HiveDDLSuite.scala | 241 +++++++++++++----- 2 files changed, 175 insertions(+), 68 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 446c8e6de2993..5d5b0707be1bb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -51,7 +51,7 @@ import org.apache.spark.util.Utils * * The syntax of using this command in SQL is: * {{{ - * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name + * CREATE TABLE [IF NOT EXISTS] [db_name.]table_name * LIKE [other_db_name.]existing_table_name [locationSpec] * }}} */ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index d495bd258b21e..4ed7661aa2f0c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.types.StructType class HiveDDLSuite extends QueryTest with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach { - import spark.implicits._ + import testImplicits._ override def afterEach(): Unit = { try { @@ -50,11 +50,10 @@ class HiveDDLSuite } // check if the directory for recording the data of the table exists. private def tableDirectoryExists( - tableIdentifier: TableIdentifier, - dbPath: Option[String] = None): Boolean = { + tableIdentifier: TableIdentifier, + dbPath: Option[String] = None): Boolean = { val expectedTablePath = if (dbPath.isEmpty) { - hiveContext.sessionState.catalog.hiveDefaultTableFilePath(tableIdentifier) } else { new Path(new Path(dbPath.get), tableIdentifier.table).toString @@ -80,6 +79,25 @@ class HiveDDLSuite } } + test("create a hive table without schema") { + import testImplicits._ + withTempPath { tempDir => + withTable("tab1", "tab2") { + (("a", "b") :: Nil).toDF().write.json(tempDir.getCanonicalPath) + + var e = intercept[AnalysisException] { sql("CREATE TABLE tab1 USING hive") }.getMessage + assert(e.contains("Unable to infer the schema. The schema specification is required to " + + "create the table `default`.`tab1`")) + + e = intercept[AnalysisException] { + sql(s"CREATE TABLE tab2 location '${tempDir.getCanonicalPath}'") + }.getMessage + assert(e.contains("Unable to infer the schema. The schema specification is required to " + + "create the table `default`.`tab2`")) + } + } + } + test("drop external tables in default database") { withTempDir { tmpDir => val tabName = "tab1" @@ -200,7 +218,7 @@ class HiveDDLSuite val e = intercept[AnalysisException] { sql("CREATE TABLE tbl(a int) PARTITIONED BY (a string)") } - assert(e.message == "Found duplicate column(s) in table definition of `tbl`: a") + assert(e.message == "Found duplicate column(s) in table definition of `default`.`tbl`: a") } test("add/drop partition with location - managed table") { @@ -223,8 +241,8 @@ class HiveDDLSuite sql( s""" |ALTER TABLE $tab ADD - |PARTITION (ds='2008-04-08', hr=11) LOCATION '$part1Path' - |PARTITION (ds='2008-04-08', hr=12) LOCATION '$part2Path' + |PARTITION (ds='2008-04-08', hr=11) LOCATION '${part1Path.toURI}' + |PARTITION (ds='2008-04-08', hr=12) LOCATION '${part2Path.toURI}' """.stripMargin) assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty)) @@ -523,9 +541,9 @@ class HiveDDLSuite assume(oldPart.storage.properties.filterKeys(expectedSerdeProps.contains) != expectedSerdeProps, "bad test: serde properties were already set") sql(s"""ALTER TABLE boxes PARTITION (width=4) - | SET SERDE '$expectedSerde' - | WITH SERDEPROPERTIES ($expectedSerdePropsString) - |""".stripMargin) + | SET SERDE '$expectedSerde' + | WITH SERDEPROPERTIES ($expectedSerdePropsString) + |""".stripMargin) val newPart = catalog.getPartition(TableIdentifier("boxes"), Map("width" -> "4")) assert(newPart.storage.serde == Some(expectedSerde)) assume(newPart.storage.properties.filterKeys(expectedSerdeProps.contains) == @@ -657,8 +675,9 @@ class HiveDDLSuite assert(sql("DESC FORMATTED view1").collect().containsSlice( Seq( Row("# View Information", "", ""), - Row("View Original Text:", "SELECT * FROM tbl", ""), - Row("View Expanded Text:", "SELECT * FROM tbl", "") + Row("View Text:", "SELECT * FROM tbl", ""), + Row("View Default Database:", "default", ""), + Row("View Query Output Columns:", "[a]", "") ) )) } @@ -709,8 +728,8 @@ class HiveDDLSuite } test("create/drop database - location without pre-created directory") { - withTempPath { tmpDir => - createDatabaseWithLocation(tmpDir, dirExists = false) + withTempPath { tmpDir => + createDatabaseWithLocation(tmpDir, dirExists = false) } } @@ -854,8 +873,8 @@ class HiveDDLSuite withCreateTableLikeDSTable(None) // create table like a data source table location ... - withTempDir { tmpDir => - withCreateTableLikeDSTable(Some(tmpDir.toURI.toString)) + withTempDir { tmpDir => + withCreateTableLikeDSTable(Some(tmpDir.toURI.toString)) } } @@ -897,37 +916,36 @@ class HiveDDLSuite } } - private def withCreateTableLikeExtDSTable(location : Option[String]): Unit = { - val sourceTabName = "tab1" - val targetTabName = "tab2" - var createdTableType = CatalogTableType.MANAGED - withTable(sourceTabName, targetTabName) { - withTempPath { dir => - val path = dir.getCanonicalPath - spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) - .write.format("parquet").save(path) - sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") - if ( location.isEmpty ) { - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") - } else { - createdTableType = CatalogTableType.EXTERNAL - sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$location'") - } - - // The source table should be an external data source table - val sourceTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(sourceTabName, Some("default"))) - val targetTable = spark.sessionState.catalog.getTableMetadata( - TableIdentifier(targetTabName, Some("default"))) - // The table type of the source table should be an external data source table - assert(DDLUtils.isDatasourceTable(sourceTable)) - assert(sourceTable.tableType == CatalogTableType.EXTERNAL) - - checkCreateTableLike(sourceTable, targetTable, createdTableType) + private def withCreateTableLikeExtDSTable(location : Option[String]): Unit = { + val sourceTabName = "tab1" + val targetTabName = "tab2" + var createdTableType = CatalogTableType.MANAGED + withTable(sourceTabName, targetTabName) { + withTempPath { dir => + val path = dir.getCanonicalPath + spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd) + .write.format("parquet").save(path) + sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')") + if ( location.isEmpty ) { + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName") + } else { + createdTableType = CatalogTableType.EXTERNAL + sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName LOCATION '$location'") } + + // The source table should be an external data source table + val sourceTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(sourceTabName, Some("default"))) + val targetTable = spark.sessionState.catalog.getTableMetadata( + TableIdentifier(targetTabName, Some("default"))) + // The table type of the source table should be an external data source table + assert(DDLUtils.isDatasourceTable(sourceTable)) + assert(sourceTable.tableType == CatalogTableType.EXTERNAL) + + checkCreateTableLike(sourceTable, targetTable, createdTableType) } } - + } test("CREATE TABLE LIKE a managed Hive serde table") { // CREATE TABLE LIKE a managed Hive serde table. @@ -973,7 +991,6 @@ class HiveDDLSuite withTempDir { tmpDir => withCreateTableLikeExtHiveTable(Some(tmpDir.toURI.toString)) } - } private def withCreateTableLikeExtHiveTable(location : Option[String]): Unit = { @@ -1053,7 +1070,9 @@ class HiveDDLSuite TableIdentifier(sourceViewName, Some("default"))) // The original source should be a VIEW with an empty path assert(sourceView.tableType == CatalogTableType.VIEW) - assert(sourceView.viewText.nonEmpty && sourceView.viewOriginalText.nonEmpty) + assert(sourceView.viewText.nonEmpty) + assert(sourceView.viewDefaultDatabase == Some("default")) + assert(sourceView.viewQueryColumnNames == Seq("a", "b", "c", "d")) val targetTable = spark.sessionState.catalog.getTableMetadata( TableIdentifier(targetTabName, Some("default"))) @@ -1063,15 +1082,19 @@ class HiveDDLSuite } private def checkCreateTableLike( - sourceTable: CatalogTable, - targetTable: CatalogTable, - tableType: CatalogTableType): Unit = { + sourceTable: CatalogTable, + targetTable: CatalogTable, + tableType: CatalogTableType): Unit = { // The created table should be a MANAGED table or EXTERNAL table with empty view text // and original text. assert(targetTable.tableType == tableType, s"the created table must be a Hive ${tableType.name} table") - assert(targetTable.viewText.isEmpty && targetTable.viewOriginalText.isEmpty, - "the view text and original text in the created table must be empty") + assert(targetTable.viewText.isEmpty, + "the view text in the created table must be empty") + assert(targetTable.viewDefaultDatabase.isEmpty, + "the view default database in the created table must be empty") + assert(targetTable.viewQueryColumnNames.isEmpty, + "the view query output columns in the created table must be empty") assert(targetTable.comment.isEmpty, "the comment in the created table must be empty") assert(targetTable.unsupportedFeatures.isEmpty, @@ -1098,7 +1121,7 @@ class HiveDDLSuite "the table properties of source tables should not be copied in the created table") if (DDLUtils.isDatasourceTable(sourceTable) || - sourceTable.tableType == CatalogTableType.VIEW) { + sourceTable.tableType == CatalogTableType.VIEW) { assert(DDLUtils.isDatasourceTable(targetTable), "the target table should be a data source table") } else { @@ -1299,7 +1322,7 @@ class HiveDDLSuite assert(e2.getMessage.contains(forbiddenPrefix + "foo")) val e3 = intercept[AnalysisException] { - sql(s"CREATE TABLE tbl TBLPROPERTIES ('${forbiddenPrefix}foo'='anything')") + sql(s"CREATE TABLE tbl (a INT) TBLPROPERTIES ('${forbiddenPrefix}foo'='anything')") } assert(e3.getMessage.contains(forbiddenPrefix + "foo")) } @@ -1374,9 +1397,9 @@ class HiveDDLSuite withTempPath { path => sql( s""" - |CREATE TABLE t(id int) USING hive - |OPTIONS(fileFormat 'orc', compression 'Zlib') - |LOCATION '${path.getCanonicalPath}' + |CREATE TABLE t(id int) USING hive + |OPTIONS(fileFormat 'orc', compression 'Zlib') + |LOCATION '${path.toURI}' """.stripMargin) val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) assert(DDLUtils.isHiveTable(table)) @@ -1433,7 +1456,7 @@ class HiveDDLSuite } test("create hive serde table with DataFrameWriter.saveAsTable") { - withTable("t", "t2") { + withTable("t", "t1") { Seq(1 -> "a").toDF("i", "j") .write.format("hive").option("fileFormat", "avro").saveAsTable("t") checkAnswer(spark.table("t"), Row(1, "a")) @@ -1464,17 +1487,8 @@ class HiveDDLSuite assert(table.storage.serde == Some("org.apache.hadoop.hive.serde2.avro.AvroSerDe")) - sql("INSERT INTO t SELECT 2, 'b'") - checkAnswer(spark.table("t"), Row(9, "x") :: Row(2, "b") :: Nil) - - val e = intercept[AnalysisException] { - Seq(1 -> "a").toDF("i", "j").write.format("hive").partitionBy("i").saveAsTable("t2") - } - assert(e.message.contains("A Create Table As Select (CTAS) statement is not allowed " + - "to create a partitioned table using Hive")) - val e2 = intercept[AnalysisException] { - Seq(1 -> "a").toDF("i", "j").write.format("hive").bucketBy(4, "i").saveAsTable("t2") + Seq(1 -> "a").toDF("i", "j").write.format("hive").bucketBy(4, "i").saveAsTable("t1") } assert(e2.message.contains("Creating bucketed Hive serde table is not supported yet")) @@ -1485,6 +1499,51 @@ class HiveDDLSuite } } + test("append data to hive serde table") { + withTable("t", "t1") { + Seq(1 -> "a").toDF("i", "j") + .write.format("hive").option("fileFormat", "avro").saveAsTable("t") + checkAnswer(spark.table("t"), Row(1, "a")) + + sql("INSERT INTO t SELECT 2, 'b'") + checkAnswer(spark.table("t"), Row(1, "a") :: Row(2, "b") :: Nil) + + Seq(3 -> "c").toDF("i", "j") + .write.format("hive").mode("append").saveAsTable("t") + checkAnswer(spark.table("t"), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil) + + Seq("c" -> 3).toDF("i", "j") + .write.format("hive").mode("append").saveAsTable("t") + checkAnswer(spark.table("t"), Row(1, "a") :: Row(2, "b") :: Row(3, "c") + :: Row(null, "3") :: Nil) + + Seq(4 -> "d").toDF("i", "j").write.saveAsTable("t1") + + val e = intercept[AnalysisException] { + Seq(5 -> "e").toDF("i", "j") + .write.format("hive").mode("append").saveAsTable("t1") + } + assert(e.message.contains("The format of the existing table default.t1 is " + + "`ParquetFileFormat`. It doesn't match the specified format `HiveFileFormat`.")) + } + } + + test("create partitioned hive serde table as select") { + withTable("t", "t1") { + withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { + Seq(10 -> "y").toDF("i", "j").write.format("hive").partitionBy("i").saveAsTable("t") + checkAnswer(spark.table("t"), Row("y", 10) :: Nil) + + Seq((1, 2, 3)).toDF("i", "j", "k").write.mode("overwrite").format("hive") + .partitionBy("j", "k").saveAsTable("t") + checkAnswer(spark.table("t"), Row(1, 2, 3) :: Nil) + + spark.sql("create table t1 using hive partitioned by (i) as select 1 as i, 'a' as j") + checkAnswer(spark.table("t1"), Row("a", 1) :: Nil) + } + } + } + test("read/write files with hive data source is not allowed") { withTempDir { dir => val e = intercept[AnalysisException] { @@ -1496,6 +1555,54 @@ class HiveDDLSuite Seq(1 -> "a").toDF("i", "j").write.format("hive").save(dir.getAbsolutePath) } assert(e2.message.contains("Hive data source can only be used with tables")) + + val e3 = intercept[AnalysisException] { + spark.readStream.format("hive").load(dir.getAbsolutePath) + } + assert(e3.message.contains("Hive data source can only be used with tables")) + + val e4 = intercept[AnalysisException] { + spark.readStream.schema(new StructType()).parquet(dir.getAbsolutePath) + .writeStream.format("hive").start(dir.getAbsolutePath) + } + assert(e4.message.contains("Hive data source can only be used with tables")) + } + } + + test("partitioned table should always put partition columns at the end of table schema") { + def getTableColumns(tblName: String): Seq[String] = { + spark.sessionState.catalog.getTableMetadata(TableIdentifier(tblName)).schema.map(_.name) + } + + withTable("t", "t1", "t2", "t3", "t4", "t5", "t6") { + sql("CREATE TABLE t(a int, b int, c int, d int) USING parquet PARTITIONED BY (d, b)") + assert(getTableColumns("t") == Seq("a", "c", "d", "b")) + + sql("CREATE TABLE t1 USING parquet PARTITIONED BY (d, b) AS SELECT 1 a, 1 b, 1 c, 1 d") + assert(getTableColumns("t1") == Seq("a", "c", "d", "b")) + + Seq((1, 1, 1, 1)).toDF("a", "b", "c", "d").write.partitionBy("d", "b").saveAsTable("t2") + assert(getTableColumns("t2") == Seq("a", "c", "d", "b")) + + withTempPath { path => + val dataPath = new File(new File(path, "d=1"), "b=1").getCanonicalPath + Seq(1 -> 1).toDF("a", "c").write.save(dataPath) + + sql(s"CREATE TABLE t3 USING parquet LOCATION '${path.getCanonicalPath}'") + assert(getTableColumns("t3") == Seq("a", "c", "d", "b")) + } + + sql("CREATE TABLE t4(a int, b int, c int, d int) USING hive PARTITIONED BY (d, b)") + assert(getTableColumns("t4") == Seq("a", "c", "d", "b")) + + withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { + sql("CREATE TABLE t5 USING hive PARTITIONED BY (d, b) AS SELECT 1 a, 1 b, 1 c, 1 d") + assert(getTableColumns("t5") == Seq("a", "c", "d", "b")) + + Seq((1, 1, 1, 1)).toDF("a", "b", "c", "d").write.format("hive") + .partitionBy("d", "b").saveAsTable("t6") + assert(getTableColumns("t6") == Seq("a", "c", "d", "b")) + } } } } From 5dd21b28e46b42f2af853b48b014f05d7d6b7895 Mon Sep 17 00:00:00 2001 From: ouyangxiaochen Date: Wed, 8 Feb 2017 18:25:35 +0800 Subject: [PATCH 7/7] update test cases --- .../apache/spark/sql/hive/execution/HiveDDLSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 4ed7661aa2f0c..53a0eaf373219 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -833,10 +833,10 @@ class HiveDDLSuite } test("CREATE TABLE LIKE a temporary view") { - // create table like a temporary view. + // CREATE TABLE LIKE a temporary view. withCreateTableLikeTempView(None) - // create table like a temporary view location ... + // CREATE TABLE LIKE a temporary view location ... withTempDir {tmpDir => withCreateTableLikeTempView(Some(tmpDir.toURI.toString)) } @@ -869,10 +869,10 @@ class HiveDDLSuite } test("CREATE TABLE LIKE a data source table") { - // create table like a data source table. + // CREATE TABLE LIKE a data source table. withCreateTableLikeDSTable(None) - // create table like a data source table location ... + // CREATE TABLE LIKE a data source table location ... withTempDir { tmpDir => withCreateTableLikeDSTable(Some(tmpDir.toURI.toString)) }