Skip to content

Commit 0dc0722

Browse files
authored
[Spark] Handle concurrent CREATE TABLE IF NOT EXISTS ... LIKE ... table commands (#3306)
#### Which Delta project/connector is this regarding? - [x] Spark - [ ] Standalone - [ ] Flink - [ ] Kernel - [ ] Other (fill in here) ## Description When 2 or more CREATE TABLE IF NOT EXISTS table commands are run concurrently, they both think the table doesn't exist yet and the second command fails with TABLE_ALREADY_EXISTS error. With this PR, we aim to make sure the second command end up in a no-op instead of a failure. ## How was this patch tested? UTs ## Does this PR introduce _any_ user-facing changes? No
1 parent 4482ee3 commit 0dc0722

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

spark/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ case class CreateDeltaTableCommand(
603603
case TableCreationModes.Create =>
604604
spark.sessionState.catalog.createTable(
605605
cleaned,
606-
ignoreIfExists = existingTableOpt.isDefined,
606+
ignoreIfExists = existingTableOpt.isDefined || mode == SaveMode.Ignore,
607607
validateLocation = false)
608608
case TableCreationModes.Replace | TableCreationModes.CreateOrReplace
609609
if existingTableOpt.isDefined =>

spark/src/test/scala/org/apache/spark/sql/delta/DeltaCreateTableLikeSuite.scala

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,21 @@
1717
package org.apache.spark.sql.delta
1818

1919
import java.io.File
20+
import java.net.URI
21+
import java.util.UUID
2022

23+
import org.apache.spark.sql.delta.catalog.DeltaCatalog
24+
import org.apache.spark.sql.delta.commands.{CreateDeltaTableCommand, TableCreationModes}
2125
import org.apache.spark.sql.delta.test.DeltaSQLCommandTest
2226
import org.scalatest.exceptions.TestFailedException
2327

2428
import org.apache.spark.sql.QueryTest
29+
import org.apache.spark.sql.SaveMode
2530
import org.apache.spark.sql.catalyst.TableIdentifier
31+
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
2632
import org.apache.spark.sql.functions.lit
2733
import org.apache.spark.sql.test.SharedSparkSession
34+
import org.apache.spark.sql.types.StructType
2835

2936
class DeltaCreateTableLikeSuite extends QueryTest
3037
with SharedSparkSession
@@ -277,6 +284,42 @@ class DeltaCreateTableLikeSuite extends QueryTest
277284
}
278285
}
279286

287+
test("concurrent create Managed Catalog table commands should not fail") {
288+
withTempDir { dir =>
289+
withTable("t") {
290+
def getCatalogTable: CatalogTable = {
291+
val storage = CatalogStorageFormat.empty.copy(
292+
locationUri = Some(new URI(s"$dir/${UUID.randomUUID().toString}")))
293+
val catalogTableTarget = CatalogTable(
294+
identifier = TableIdentifier("t"),
295+
tableType = CatalogTableType.MANAGED,
296+
storage = storage,
297+
provider = Some("delta"),
298+
schema = new StructType().add("id", "long"))
299+
new DeltaCatalog()
300+
.verifyTableAndSolidify(
301+
tableDesc = catalogTableTarget,
302+
query = None,
303+
maybeClusterBySpec = None)
304+
}
305+
CreateDeltaTableCommand(
306+
getCatalogTable,
307+
existingTableOpt = None,
308+
mode = SaveMode.Ignore,
309+
query = None,
310+
operation = TableCreationModes.Create).run(spark)
311+
assert(spark.sessionState.catalog.tableExists(TableIdentifier("t")))
312+
CreateDeltaTableCommand(
313+
getCatalogTable,
314+
existingTableOpt = None, // Set to None to simulate concurrent table creation commands.
315+
mode = SaveMode.Ignore,
316+
query = None,
317+
operation = TableCreationModes.Create).run(spark)
318+
assert(spark.sessionState.catalog.tableExists(TableIdentifier("t")))
319+
}
320+
}
321+
}
322+
280323
test("CREATE TABLE LIKE where sourceTable is a json table") {
281324
val srcTbl = "srcTbl"
282325
val targetTbl = "targetTbl"

0 commit comments

Comments
 (0)