apache · yaooqinn · Feb 20, 2024 · Feb 20, 2024 · Feb 21, 2024 · Feb 21, 2024
diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
@@ -1990,12 +1990,6 @@
     },
     "sqlState" : "HY000"
   },
-  "INVALID_HIVE_COLUMN_NAME" : {
-    "message" : [
-      "Cannot create the table <tableName> having the column <columnName> whose name contains invalid characters <invalidChars> in Hive metastore."
-    ],
-    "sqlState" : "42K05"
-  },
   "INVALID_IDENTIFIER" : {
     "message" : [
       "The identifier <ident> is invalid. Please, consider quoting it with back-quotes as `<ident>`."

diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
@@ -1182,12 +1182,6 @@ The handle `<handle>` is invalid.
 
 For more details see [INVALID_HANDLE](sql-error-conditions-invalid-handle-error-class.html)
 
-### INVALID_HIVE_COLUMN_NAME
-
-[SQLSTATE: 42K05](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create the table `<tableName>` having the column `<columnName>` whose name contains invalid characters `<invalidChars>` in Hive metastore.
-
 ### INVALID_IDENTIFIER
 
 [SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -42,13 +42,12 @@ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
-import org.apache.spark.sql.catalyst.util.TypeUtils.{toSQLId, toSQLValue}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions}
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.internal.StaticSQLConf._
-import org.apache.spark.sql.types.{AnsiIntervalType, ArrayType, DataType, MapType, StructType, TimestampNTZType}
+import org.apache.spark.sql.types._
 
 /**
  * A persistent implementation of the system catalog using Hive.
@@ -150,51 +149,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
   }
 
-  /**
-   * Checks the validity of data column names. Hive metastore disallows the table to use some
-   * special characters (',', ':', and ';') in data column names, including nested column names.
-   * Partition columns do not have such a restriction. Views do not have such a restriction.
-   */
-  private def verifyDataSchema(
-      tableName: TableIdentifier, tableType: CatalogTableType, dataSchema: StructType): Unit = {
-    if (tableType != VIEW) {
-      val invalidChars = Seq(",", ":", ";")
-      def verifyNestedColumnNames(schema: StructType): Unit = schema.foreach { f =>
-        f.dataType match {
-          case st: StructType => verifyNestedColumnNames(st)
-          case _ if invalidChars.exists(f.name.contains) =>
-            val invalidCharsString = invalidChars.map(c => s"'$c'").mkString(", ")
-            throw new AnalysisException(
-              errorClass = "INVALID_HIVE_COLUMN_NAME",
-              messageParameters = Map(
-                "invalidChars" -> invalidCharsString,
-                "tableName" -> toSQLId(tableName.nameParts),
-                "columnName" -> toSQLId(f.name)
-              ))
-          case _ =>
-        }
-      }
-
-      dataSchema.foreach { f =>
-        f.dataType match {
-          // Checks top-level column names
-          case _ if f.name.contains(",") =>
-            throw new AnalysisException(
-              errorClass = "INVALID_HIVE_COLUMN_NAME",
-              messageParameters = Map(
-                "invalidChars" -> toSQLValue(","),
-                "tableName" -> toSQLId(tableName.nameParts),
-                "columnName" -> toSQLId(f.name)
-              ))
-          // Checks nested column names
-          case st: StructType =>
-            verifyNestedColumnNames(st)
-          case _ =>
-        }
-      }
-    }
-  }
-
   // --------------------------------------------------------------------------
   // Databases
   // --------------------------------------------------------------------------
@@ -260,8 +214,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val table = tableDefinition.identifier.table
     requireDbExists(db)
     verifyTableProperties(tableDefinition)
-    verifyDataSchema(
-      tableDefinition.identifier, tableDefinition.tableType, tableDefinition.dataSchema)
 
     if (tableExists(db, table) && !ignoreIfExists) {
       throw new TableAlreadyExistsException(db = db, table = table)
@@ -711,7 +663,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       newDataSchema: StructType): Unit = withClient {
     requireTableExists(db, table)
     val oldTable = getTable(db, table)
-    verifyDataSchema(oldTable.identifier, oldTable.tableType, newDataSchema)
     val schemaProps =
       tableMetaToTableProps(oldTable, StructType(newDataSchema ++ oldTable.partitionSchema)).toMap
 

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -34,10 +34,10 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLType
 import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
 import org.apache.spark.sql.execution.datasources.orc.OrcCompressionCodec
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetCompressionCodec, ParquetFooterReader}
-import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
 import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, CONVERT_METASTORE_PARQUET}
 import org.apache.spark.sql.hive.orc.OrcFileOperator
@@ -2876,22 +2876,39 @@ class HiveDDLSuite
     }
   }
 
-  test("SPARK-24681 checks if nested column names do not include ',', ':', and ';'") {
-    Seq("nested,column", "nested:column", "nested;column").foreach { nestedColumnName =>
+  test("SPARK-47101 checks if nested column names do not include invalid characters") {
+    // delimiter characters
+    Seq(",", ":").foreach { c =>
+      val typ = s"array<struct<`abc${c}xyz`:int>>"
+      // The regex is from HiveClientImpl.getSparkSQLDataType, please keep them in sync.
+      val replaced = typ.replaceAll("`", "").replaceAll("(?<=struct<|,)([^,<:]+)(?=:)", "`$1`")
+      withTable("t") {
+        checkError(
+          exception = intercept[SparkException] {
+            sql(s"CREATE TABLE t (a $typ) USING hive")
+          },
+          errorClass = "CANNOT_RECOGNIZE_HIVE_TYPE",
+          parameters = Map(
+            "fieldType" -> toSQLType(replaced),
+            "fieldName" -> "`a`")
+        )
+      }
+    }
+    // other special characters
+    Seq(";", "^", "\\", "/", "%").foreach { c =>
+      val typ = s"array<struct<`abc${c}xyz`:int>>"
+      val replaced = typ.replaceAll("`", "")
+      val msg = s"java.lang.IllegalArgumentException: Error: : expected at the position " +
+        s"16 of '$replaced' but '$c' is found."
       withTable("t") {
         checkError(
           exception = intercept[AnalysisException] {
-            spark.range(1)
-              .select(struct(lit(0).as(nestedColumnName)).as("toplevel"))
-              .write
-              .format("hive")
-              .saveAsTable("t")
+            sql(s"CREATE TABLE t (a $typ) USING hive")
           },
-          errorClass = "INVALID_HIVE_COLUMN_NAME",
+          errorClass = "_LEGACY_ERROR_TEMP_3065",
           parameters = Map(
-            "invalidChars" -> "',', ':', ';'",
-            "tableName" -> "`spark_catalog`.`default`.`t`",
-            "columnName" -> s"`$nestedColumnName`")
+            "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
+            "msg" -> msg)
         )
       }
     }
@@ -3385,24 +3402,11 @@ class HiveDDLSuite
     }
   }
 
-  test("SPARK-44911: Create the table with invalid column") {
+  test("SPARK-47101: comma is allowed in column name") {
     val tbl = "t1"
     withTable(tbl) {
-      val e = intercept[AnalysisException] {
-        sql(
-          s"""
-             |CREATE TABLE t1
-             |STORED AS parquet
-             |SELECT id, DATE'2018-01-01' + MAKE_DT_INTERVAL(0, id) FROM RANGE(0, 10)
-         """.stripMargin)
-      }
-      checkError(e,
-        errorClass = "INVALID_HIVE_COLUMN_NAME",
-        parameters = Map(
-          "invalidChars" -> "','",
-          "tableName" -> "`spark_catalog`.`default`.`t1`",
-          "columnName" -> "`DATE '2018-01-01' + make_dt_interval(0, id, 0, 0`.`000000)`")
-      )
+      sql("CREATE TABLE t1 STORED AS parquet SELECT id as `a,b` FROM range(1)")
+      checkAnswer(sql("SELECT * FROM t1"), Row(0))
     }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2141,41 +2141,6 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
     }
   }
 
-  test("Auto alias construction of get_json_object") {
-    val df = Seq(("1", """{"f1": "value1", "f5": 5.23}""")).toDF("key", "jstring")
-
-    withTable("t") {
-      val e = intercept[AnalysisException] {
-        df.select($"key", functions.get_json_object($"jstring", "$.f1"))
-          .write.format("hive").saveAsTable("t")
-      }
-      checkError(e,
-        errorClass = "INVALID_HIVE_COLUMN_NAME",
-        parameters = Map(
-          "invalidChars" -> "','",
-          "tableName" -> "`spark_catalog`.`default`.`t`",
-          "columnName" -> "`get_json_object(jstring, $`.`f1)`")
-      )
-    }
-
-    withTempView("tempView") {
-      withTable("t") {
-        df.createTempView("tempView")
-        val e = intercept[AnalysisException] {
-          sql("CREATE TABLE t USING hive AS " +
-            "SELECT key, get_json_object(jstring, '$.f1') FROM tempView")
-        }
-        checkError(e,
-          errorClass = "INVALID_HIVE_COLUMN_NAME",
-          parameters = Map(
-            "invalidChars" -> "','",
-            "tableName" -> "`spark_catalog`.`default`.`t`",
-            "columnName" -> "`get_json_object(jstring, $`.`f1)`")
-        )
-      }
-    }
-  }
-
   test("SPARK-19912 String literals should be escaped for Hive metastore partition pruning") {
     withTable("spark_19912") {
       Seq(