apache · srielau · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025 · Nov 3, 2025
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -2030,7 +2030,7 @@
   },
   "IDENTIFIER_TOO_MANY_NAME_PARTS" : {
     "message" : [
-      "<identifier> is not a valid identifier as it has more than 2 name parts."
+      "<identifier> is not a valid identifier as it has more than <limit> name parts."
     ],
     "sqlState" : "42601"
   },
@@ -8539,11 +8539,6 @@
       "Failed to merge incompatible schemas <left> and <right>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2096" : {
-    "message" : [
-      "<ddl> is not supported temporarily."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2097" : {
     "message" : [
       "Could not execute broadcast in <timeout> secs. You can increase the timeout for broadcasts via <broadcastTimeout> or disable broadcast join by setting <autoBroadcastJoinThreshold> to -1 or remove the broadcast hint if it exists in your code."

diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -46,6 +46,13 @@ options { tokenVocab = SqlBaseLexer; }
    * When true, parameter markers are allowed everywhere a literal is supported.
    */
   public boolean parameter_substitution_enabled = true;
+
+  /**
+   * When false (default), IDENTIFIER('literal') is resolved to an identifier at parse time (identifier-lite).
+   * When true, only the legacy IDENTIFIER(expression) function syntax is allowed.
+   * Controlled by spark.sql.legacy.identifierClause configuration.
+   */
+  public boolean legacy_identifier_clause_only = false;
 }
 
 compoundOrSingleStatement
@@ -92,7 +99,7 @@ sqlStateValue
     ;
 
 declareConditionStatement
-    : DECLARE multipartIdentifier CONDITION (FOR SQLSTATE VALUE? sqlStateValue)?
+    : DECLARE strictIdentifier CONDITION (FOR SQLSTATE VALUE? sqlStateValue)?
     ;
 
 conditionValue
@@ -125,11 +132,11 @@ repeatStatement
     ;
 
 leaveStatement
-    : LEAVE multipartIdentifier
+    : LEAVE strictIdentifier
     ;
 
 iterateStatement
-    : ITERATE multipartIdentifier
+    : ITERATE strictIdentifier
     ;
 
 caseStatement
@@ -144,19 +151,19 @@ loopStatement
     ;
 
 forStatement
-    : beginLabel? FOR (multipartIdentifier AS)? query DO compoundBody END FOR endLabel?
+    : beginLabel? FOR (strictIdentifier AS)? query DO compoundBody END FOR endLabel?
     ;
 
 singleStatement
     : (statement|setResetStatement) SEMICOLON* EOF
     ;
 
 beginLabel
-    : multipartIdentifier COLON
+    : strictIdentifier COLON
     ;
 
 endLabel
-    : multipartIdentifier
+    : strictIdentifier
     ;
 
 singleExpression
@@ -321,7 +328,7 @@ statement
     | SHOW VIEWS ((FROM | IN) identifierReference)?
         (LIKE? pattern=stringLit)?                                        #showViews
     | SHOW PARTITIONS identifierReference partitionSpec?               #showPartitions
-    | SHOW identifier? FUNCTIONS ((FROM | IN) ns=identifierReference)?
+    | SHOW functionScope=simpleIdentifier? FUNCTIONS ((FROM | IN) ns=identifierReference)?
         (LIKE? (legacy=multipartIdentifier | pattern=stringLit))?      #showFunctions
     | SHOW PROCEDURES ((FROM | IN) identifierReference)?               #showProcedures
     | SHOW CREATE TABLE identifierReference (AS SERDE)?                #showCreateTable
@@ -833,8 +840,8 @@ hint
     ;
 
 hintStatement
-    : hintName=identifier
-    | hintName=identifier LEFT_PAREN parameters+=primaryExpression (COMMA parameters+=primaryExpression)* RIGHT_PAREN
+    : hintName=simpleIdentifier
+    | hintName=simpleIdentifier LEFT_PAREN parameters+=primaryExpression (COMMA parameters+=primaryExpression)* RIGHT_PAREN
     ;
 
 fromClause
@@ -1241,7 +1248,7 @@ primaryExpression
     | identifier                                                                               #columnReference
     | base=primaryExpression DOT fieldName=identifier                                          #dereference
     | LEFT_PAREN expression RIGHT_PAREN                                                        #parenthesizedExpression
-    | EXTRACT LEFT_PAREN field=identifier FROM source=valueExpression RIGHT_PAREN              #extract
+    | EXTRACT LEFT_PAREN field=simpleIdentifier FROM source=valueExpression RIGHT_PAREN              #extract
     | (SUBSTR | SUBSTRING) LEFT_PAREN str=valueExpression (FROM | COMMA) pos=valueExpression
       ((FOR | COMMA) len=valueExpression)? RIGHT_PAREN                                         #substring
     | TRIM LEFT_PAREN trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)?
@@ -1297,7 +1304,7 @@ constant
     ;
 
 namedParameterMarker
-    : COLON identifier
+    : COLON simpleIdentifier
     ;
 comparisonOperator
     : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ
@@ -1599,13 +1606,32 @@ identifier
     | {!SQL_standard_keyword_behavior}? strictNonReserved
     ;
 
+// simpleIdentifier: like identifier but without IDENTIFIER('literal') support
+// Use this for contexts where IDENTIFIER() syntax is not appropriate:
+//   - Named parameters (:param_name)
+//   - Extract field names (EXTRACT(field FROM ...))
+//   - Other keyword-like or string-like uses
+simpleIdentifier
+    : simpleStrictIdentifier
+    | {!SQL_standard_keyword_behavior}? strictNonReserved
+    ;
+
 strictIdentifier
     : IDENTIFIER              #unquotedIdentifier
     | quotedIdentifier        #quotedIdentifierAlternative
+    | {!legacy_identifier_clause_only}? IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN  #identifierLiteral
     | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
     | {!SQL_standard_keyword_behavior}? nonReserved    #unquotedIdentifier
     ;
 
+// simpleStrictIdentifier: like strictIdentifier but without IDENTIFIER('literal') support
+simpleStrictIdentifier
+    : IDENTIFIER              #simpleUnquotedIdentifier
+    | quotedIdentifier        #simpleQuotedIdentifierAlternative
+    | {SQL_standard_keyword_behavior}? ansiNonReserved #simpleUnquotedIdentifier
+    | {!SQL_standard_keyword_behavior}? nonReserved    #simpleUnquotedIdentifier
+    ;
+
 quotedIdentifier
     : BACKQUOTED_IDENTIFIER
     | {double_quoted_identifiers}? DOUBLEQUOTED_STRING

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
@@ -20,15 +20,15 @@ import java.util.Locale
 
 import scala.jdk.CollectionConverters._
 
-import org.antlr.v4.runtime.Token
+import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.tree.ParseTree
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin}
 import org.apache.spark.sql.connector.catalog.IdentityColumnSpec
-import org.apache.spark.sql.errors.QueryParsingErrors
+import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryParsingErrors}
 import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType, GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType}
 
@@ -60,12 +60,52 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType,
  *
  * @see
  *   [[org.apache.spark.sql.catalyst.parser.AstBuilder]] for the full SQL statement parser
+ *
+ * ==CRITICAL: Extracting Identifier Names==
+ *
+ * When extracting identifier names from parser contexts, you MUST use the helper methods provided
+ * by this class instead of calling ctx.getText() directly:
+ *
+ *   - '''getIdentifierText(ctx)''': For single identifiers (column names, aliases, window names)
+ *   - '''getIdentifierParts(ctx)''': For qualified identifiers (table names, schema.table)
+ *
+ * '''DO NOT use ctx.getText() or ctx.identifier.getText()''' directly! These methods do not
+ * handle the IDENTIFIER('literal') syntax and will cause incorrect behavior.
+ *
+ * The IDENTIFIER('literal') syntax allows string literals to be used as identifiers at parse time
+ * (e.g., IDENTIFIER('my_col') resolves to the identifier my_col). If you use getText(), you'll
+ * get the raw text "IDENTIFIER('my_col')" instead of "my_col", breaking the feature.
+ *
+ * Example:
+ * {{{
+ *   // WRONG - does not handle IDENTIFIER('literal'):
+ *   val name = ctx.identifier.getText
+ *   SubqueryAlias(ctx.name.getText, plan)
+ *
+ *   // CORRECT - handles both regular identifiers and IDENTIFIER('literal'):
+ *   val name = getIdentifierText(ctx.identifier)
+ *   SubqueryAlias(getIdentifierText(ctx.name), plan)
+ * }}}
  */
-class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
+class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeErrorsBase {
   protected def typedVisit[T](ctx: ParseTree): T = {
     ctx.accept(this).asInstanceOf[T]
   }
 
+  /**
+   * Public helper to extract identifier parts from a context. This is exposed as public to allow
+   * utility classes like ParserUtils to reuse the identifier resolution logic without duplicating
+   * code.
+   *
+   * @param ctx
+   *   The parser context containing the identifier.
+   * @return
+   *   Sequence of identifier parts.
+   */
+  def extractIdentifierParts(ctx: ParserRuleContext): Seq[String] = {
+    getIdentifierParts(ctx)
+  }
+
   override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
     typedVisit[DataType](ctx.dataType)
   }
@@ -161,11 +201,89 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
   }
 
   /**
-   * Create a multi-part identifier.
+   * Parse a string into a multi-part identifier. Subclasses should override this method to
+   * provide proper multi-part identifier parsing with access to a full SQL parser.
+   *
+   * For example, in AstBuilder, this would parse "`catalog`.`schema`.`table`" into Seq("catalog",
+   * "schema", "table").
+   *
+   * This method is only called when parsing IDENTIFIER('literal') where the literal contains a
+   * qualified identifier (e.g., IDENTIFIER('schema.table')). Since DataTypeAstBuilder only parses
+   * data types (not full SQL with qualified table names), this should never be called in
+   * practice. The base implementation throws an error to catch unexpected usage.
+   *
+   * @param identifier
+   *   The identifier string to parse, potentially containing dots and backticks.
+   * @return
+   *   Sequence of identifier parts.
+   */
+  protected def parseMultipartIdentifier(identifier: String): Seq[String] = {
+    throw SparkException.internalError(
+      "parseMultipartIdentifier must be overridden by subclasses. " +
+        s"Attempted to parse: $identifier")
+  }
+
+  /**
+   * Get the identifier parts from a context, handling both regular identifiers and
+   * IDENTIFIER('literal'). This method is used to support identifier-lite syntax where
+   * IDENTIFIER('string') is folded at parse time. For qualified identifiers like
+   * IDENTIFIER('`catalog`.`schema`'), this will parse the string and return multiple parts.
+   *
+   * Subclasses should override this method to provide actual parsing logic.
+   */
+  protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = {
+    ctx match {
+      case idCtx: IdentifierContext =>
+        // identifier can be either strictIdentifier or strictNonReserved.
+        // Recursively process the strictIdentifier.
+        Option(idCtx.strictIdentifier()).map(getIdentifierParts).getOrElse(Seq(ctx.getText))
+
+      case idLitCtx: IdentifierLiteralContext =>
+        // For IDENTIFIER('literal') in strictIdentifier.
+        val literalValue = string(visitStringLit(idLitCtx.stringLit()))
+        // Parse the string to handle qualified identifiers like "`cat`.`schema`".
+        parseMultipartIdentifier(literalValue)
+
+      case errCapture: ErrorCapturingIdentifierContext =>
+        // Regular identifier with errorCapturingIdentifierExtra.
+        // Need to recursively handle identifier which might itself be IDENTIFIER('literal').
+        Option(errCapture.identifier())
+          .flatMap(id => Option(id.strictIdentifier()).map(getIdentifierParts))
+          .getOrElse(Seq(ctx.getText))
+
+      case _ =>
+        // For regular identifiers, just return the text as a single part.
+        Seq(ctx.getText)
+    }
+  }
+
+  /**
+   * Get the text of a SINGLE identifier, handling both regular identifiers and
+   * IDENTIFIER('literal'). This method REQUIRES that the identifier be unqualified (single part
+   * only). If IDENTIFIER('qualified.name') is used where a single identifier is required, this
+   * will error.
+   */
+  protected def getIdentifierText(ctx: ParserRuleContext): String = {
+    val parts = getIdentifierParts(ctx)
+    if (parts.size > 1) {
+      throw new ParseException(
+        errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+        messageParameters = Map("identifier" -> toSQLId(parts), "limit" -> "1"),
+        ctx)
+    }
+    parts.head
+  }
+
+  /**
+   * Create a multi-part identifier. Handles identifier-lite with qualified identifiers like
+   * IDENTIFIER('`cat`.`schema`').table
    */
   override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] =
     withOrigin(ctx) {
-      ctx.parts.asScala.map(_.getText).toSeq
+      // Each part is an errorCapturingIdentifier (which wraps identifier).
+      // getIdentifierParts recursively handles IDENTIFIER('literal') syntax through
+      // identifier -> strictIdentifier -> identifierLiteral.
+      ctx.parts.asScala.flatMap(getIdentifierParts).toSeq
     }
 
   /**
@@ -351,7 +469,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
     }
 
     StructField(
-      name = colName.getText,
+      name = getIdentifierText(colName),
       dataType = typedVisit[DataType](ctx.dataType),
       nullable = NULL == null,
       metadata = builder.build())

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala
@@ -81,7 +81,8 @@ class SubstituteParmsAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
    */
   override def visitNamedParameterLiteral(ctx: NamedParameterLiteralContext): AnyRef =
     withOrigin(ctx) {
-      val paramName = ctx.namedParameterMarker().identifier().getText
+      // Named parameters use simpleIdentifier, so .getText() is correct.
+      val paramName = ctx.namedParameterMarker().simpleIdentifier().getText
       namedParams += paramName
 
       // Calculate the location of the entire parameter (including the colon)
@@ -117,7 +118,8 @@ class SubstituteParmsAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
    */
   override def visitNamedParameterMarkerRule(ctx: NamedParameterMarkerRuleContext): AnyRef =
     withOrigin(ctx) {
-      val paramName = ctx.namedParameterMarker().identifier().getText
+      // Named parameters use simpleIdentifier, so .getText() is correct.
+      val paramName = ctx.namedParameterMarker().simpleIdentifier().getText
       namedParams += paramName
 
       // Calculate the location of the entire parameter (including the colon)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala
@@ -430,7 +430,15 @@ case class UnclosedCommentProcessor(command: String, tokenStream: CommonTokenStr
 }
 
 object DataTypeParser extends AbstractParser {
-  override protected def astBuilder: DataTypeAstBuilder = new DataTypeAstBuilder
+  override protected def astBuilder: DataTypeAstBuilder = new DataTypeAstBuilder {
+    // DataTypeParser only parses data types, not full SQL.
+    // Multi-part identifiers should not appear in IDENTIFIER() within type definitions.
+    override protected def parseMultipartIdentifier(identifier: String): Seq[String] = {
+      throw SparkException.internalError(
+        "DataTypeParser does not support multi-part identifiers in IDENTIFIER(). " +
+          s"Attempted to parse: $identifier")
+    }
+  }
 }
 
 object AbstractParser extends Logging {
@@ -476,6 +484,7 @@ object AbstractParser extends Logging {
     parser.SQL_standard_keyword_behavior = conf.enforceReservedKeywords
     parser.double_quoted_identifiers = conf.doubleQuotedIdentifiers
     parser.parameter_substitution_enabled = !conf.legacyParameterSubstitutionConstantsOnly
+    parser.legacy_identifier_clause_only = conf.legacyIdentifierClauseOnly
   }
 
   /**

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -477,7 +477,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       ctx)
   }
 
-  def showFunctionsUnsupportedError(identifier: String, ctx: IdentifierContext): Throwable = {
+  def showFunctionsUnsupportedError(identifier: String, ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.SHOW_FUNCTIONS_INVALID_SCOPE",
       messageParameters = Map("scope" -> toSQLId(identifier)),

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
@@ -51,6 +51,7 @@ private[sql] trait SqlApiConf {
   def parserDfaCacheFlushThreshold: Int
   def parserDfaCacheFlushRatio: Double
   def legacyParameterSubstitutionConstantsOnly: Boolean
+  def legacyIdentifierClauseOnly: Boolean
 }
 
 private[sql] object SqlApiConf {
@@ -104,4 +105,5 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf {
   override def parserDfaCacheFlushThreshold: Int = -1
   override def parserDfaCacheFlushRatio: Double = -1.0
   override def legacyParameterSubstitutionConstantsOnly: Boolean = false
+  override def legacyIdentifierClauseOnly: Boolean = false
 }