From f8f871635081d7bcd95ea44db8dda76cf853c148 Mon Sep 17 00:00:00 2001 From: Prashant Wason Date: Thu, 30 Apr 2026 22:19:19 -0700 Subject: [PATCH] fix: filter EXTERNAL property in SparkCatalogMetaStoreClient.toCatalogTable Hudi's `HMSDDLExecutor.createTable` sets both `tableType=EXTERNAL_TABLE` and `parameters[EXTERNAL]=TRUE` on the Hive Table object when the table is external. When that Table flows through `SparkCatalogMetaStoreClient` into `HiveExternalCatalog`, `verifyTableProperties` rejects: AnalysisException: Cannot set or change the preserved property key: 'EXTERNAL' Spark uses `CatalogTableType.EXTERNAL` on the `CatalogTable` itself to encode external-ness, and treats `EXTERNAL=...` as a duplicate (and forbidden) encoding. We already map `tableType` correctly via `if ("EXTERNAL_TABLE".equalsIgnoreCase(table.getTableType))`, so dropping the property in the same filter that already strips `spark.sql.*` is safe. Same family as #18654 (filter `spark.sql.*`). Adds a regression test mirroring the real `HMSDDLExecutor` shape: `tableType=EXTERNAL_TABLE` AND `parameters[EXTERNAL]=TRUE`. Co-Authored-By: Claude Opus 4.7 --- .../hive/SparkCatalogMetaStoreClient.scala | 9 ++++++- .../TestSparkCatalogMetaStoreClient.scala | 26 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala index bd1133e5c31ab..9fa225eeb8767 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala @@ -311,8 +311,15 @@ class SparkCatalogMetaStoreClient(syncConfig: HiveSyncConfig) // table property keys may not start with 'spark.sql.'") because they are reserved for // Spark's internal use (provider, schema parts, create version). Spark re-derives and // writes these from the CatalogTable itself, so dropping them on the way in is safe. + // + // Also strip "EXTERNAL". HMSDDLExecutor.createTable sets both + // `tableType=EXTERNAL_TABLE` and `parameters[EXTERNAL]=TRUE`. Spark's + // HiveExternalCatalog.verifyTableProperties rejects "EXTERNAL" as a property key + // ("Cannot set or change the preserved property key: 'EXTERNAL'") because it controls + // table type via CatalogTableType instead. The tableType field below already encodes + // that information, so dropping the property is safe. val tableProperties = Option(table.getParameters).map(_.asScala.toMap).getOrElse(Map.empty) - .filterNot { case (k, _) => k.startsWith("spark.sql.") } + .filterNot { case (k, _) => k.startsWith("spark.sql.") || k == "EXTERNAL" } CatalogTable( identifier = TableIdentifier(tbl, Some(db)), diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala index e6385ee935281..93b2be295e3de 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala @@ -177,6 +177,32 @@ class TestSparkCatalogMetaStoreClient extends FunSuite with BeforeAndAfterAll { } } + test("createTable accepts EXTERNAL=TRUE parameter (mirrors HMSDDLExecutor behavior)") { + withTempDir { tmp => + val client = newClient() + val databaseName = generateName("db") + val tableName = generateName("tbl") + + client.createDatabase(new Database(databaseName, "test database", new File(tmp, databaseName).toURI.toString, new util.HashMap[String, String]())) + + // Hudi's HMSDDLExecutor.createTable sets BOTH `tableType=EXTERNAL_TABLE` and + // `parameters[EXTERNAL]=TRUE` on the Hive Table object. Spark's + // HiveExternalCatalog.verifyTableProperties rejects "EXTERNAL" as a property key + // unless we strip it in toCatalogTable. This test mirrors that real-world shape. + val createdTable = newTable( + databaseName, + tableName, + new File(tmp, tableName).toURI.toString, + Seq("id" -> "int", "name" -> "string"), + Seq("dt" -> "string"), + Map("EXTERNAL" -> "TRUE", "comment" -> "v1")) + + client.createTable(createdTable) + assertTrue(client.tableExists(databaseName, tableName)) + assertEquals("v1", client.getTable(databaseName, tableName).getParameters.get("comment")) + } + } + private def newClient(): SparkCatalogMetaStoreClient = { SparkSession.setActiveSession(spark) SparkSession.setDefaultSession(spark)