From 49e272db15d1635a8d4927efa6c51968db18d29c Mon Sep 17 00:00:00 2001 From: sandeep-katta Date: Tue, 25 Sep 2018 18:31:43 +0530 Subject: [PATCH 1/3] RootCause:When database is dropped all the data related to it is deleted Modification content:Handled Review Comments --- .../sql/catalyst/catalog/SessionCatalog.scala | 8 +++++++ .../sql/execution/command/DDLSuite.scala | 21 +++++++++++++------ .../HiveThriftServer2Suites.scala | 1 + .../sql/hive/execution/HiveDDLSuite.scala | 13 ++++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 74559f5d88796..5d8d34e6d9411 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -208,6 +208,14 @@ class SessionCatalog( "you cannot create a database with this name.") } validateName(dbName) + // SPARK-25464 fail if DB location exists and is not empty + val dbPath = new Path(dbDefinition.locationUri) + val fs = dbPath.getFileSystem(hadoopConf) + if (!externalCatalog.databaseExists(dbName) && fs.exists(dbPath) + && fs.listStatus(dbPath).nonEmpty) { + throw new AnalysisException( + s"Cannot create database at location $dbPath because the path is not empty.") + } val qualifiedPath = makeQualifiedPath(dbDefinition.locationUri) externalCatalog.createDatabase( dbDefinition.copy(name = dbName, locationUri = qualifiedPath), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index b777db750a1bb..c3f19db5a89ab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -834,12 +834,21 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } test("create table in default db") { - val catalog = spark.sessionState.catalog - val tableIdent1 = TableIdentifier("tab1", None) - createTable(catalog, tableIdent1) - val expectedTableIdent = tableIdent1.copy(database = Some("default")) - val expectedTable = generateTable(catalog, expectedTableIdent) - checkCatalogTables(expectedTable, catalog.getTableMetadata(tableIdent1)) + var tablePath: URI = null + try { + val catalog = spark.sessionState.catalog + val tableIdent1 = TableIdentifier("tab1", None) + createTable(catalog, tableIdent1) + val expectedTableIdent = tableIdent1.copy(database = Some("default")) + val expectedTable = generateTable(catalog, expectedTableIdent) + tablePath = expectedTable.location + checkCatalogTables(expectedTable, catalog.getTableMetadata(tableIdent1)) + } finally { + // This is an external table, so it is required to delete + if (null != tablePath) { + Utils.deleteRecursively(new File(tablePath)) + } + } } test("create table in a specific db") { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 9c53e9018668d..25ca3f4d44f92 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -416,6 +416,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { statement.execute("USE db1") // access test_map2 statement.executeQuery("SELECT key from test_map2") + statement.execute("DROP DATABASE db1 CASCADE") } ) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index a907fcae526c0..509ab8b591566 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2516,4 +2516,17 @@ class HiveDDLSuite } } } + + test("SPARK-25464 create a database with a non empty location") { + val dbName = "dbwithcustomlocation" + withTempDir { tmpDir => + val parentDir = tmpDir.getParent + val expectedMsg = s"Cannot create database at location $parentDir because the path is not " + + "empty." + val e = intercept[AnalysisException] { + sql(s"CREATE DATABASE $dbName Location '$parentDir' ") + }.getMessage + assert(e.contains(expectedMsg)) + } + } } From b2ec189205b0fe6463283bd805f10b77ddb4ff25 Mon Sep 17 00:00:00 2001 From: sandeep-katta Date: Thu, 7 Mar 2019 11:55:54 +0530 Subject: [PATCH 2/3] RootCause:When database is dropped all the data related to it is deleted Modification content:Updated the sql-migration-guide to notify the user about behaviour change --- docs/sql-migration-guide-upgrade.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index b2bd8cefc3f96..6626faeefe3ae 100644 --- a/docs/sql-migration-guide-upgrade.md +++ b/docs/sql-migration-guide-upgrade.md @@ -31,6 +31,8 @@ license: | - In Spark version 2.4 and earlier, SQL queries such as `FROM ` or `FROM
UNION ALL FROM
` are supported by accident. In hive-style `FROM
SELECT `, the `SELECT` clause is not negligible. Neither Hive nor Presto support this syntax. Therefore we will treat these queries as invalid since Spark 3.0. + - Since Spark 3.0, creating a database with nonempty location is not allowed. An exception is thrown when attempting to create a database with nonempty location. + - Since Spark 3.0, the Dataset and DataFrame API `unionAll` is not deprecated any more. It is an alias for `union`. - In PySpark, when creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 3.0, the builder comes to not update the configurations. This is the same behavior as Java/Scala API in 2.3 and above. If you want to update them, you need to update them prior to creating a `SparkSession`. From 0a52bdcce263d4aa647b9b517bcc02d95a7569ab Mon Sep 17 00:00:00 2001 From: sandeep katta Date: Mon, 12 Aug 2019 21:06:22 +0530 Subject: [PATCH 3/3] review comments fixed --- .../org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 2 +- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 5d8d34e6d9411..45e9de5446b30 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -214,7 +214,7 @@ class SessionCatalog( if (!externalCatalog.databaseExists(dbName) && fs.exists(dbPath) && fs.listStatus(dbPath).nonEmpty) { throw new AnalysisException( - s"Cannot create database at location $dbPath because the path is not empty.") + s"Cannot create database at location $dbPath as the path already exists.") } val qualifiedPath = makeQualifiedPath(dbDefinition.locationUri) externalCatalog.createDatabase( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 509ab8b591566..6b194c913009c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2521,8 +2521,7 @@ class HiveDDLSuite val dbName = "dbwithcustomlocation" withTempDir { tmpDir => val parentDir = tmpDir.getParent - val expectedMsg = s"Cannot create database at location $parentDir because the path is not " + - "empty." + val expectedMsg = s"Cannot create database at location $parentDir as the path already exists." val e = intercept[AnalysisException] { sql(s"CREATE DATABASE $dbName Location '$parentDir' ") }.getMessage