From 4cbcebe2d1221f91255b7d91a4d34e5223b10f49 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 28 Oct 2025 06:26:02 -0700 Subject: [PATCH 01/37] IDENTIFIER everywhere --- .../resources/error/error-conditions.json | 2 +- .../sql/catalyst/parser/SqlBaseParser.g4 | 5 +- .../catalyst/parser/DataTypeAstBuilder.scala | 68 ++++++++- .../sql/catalyst/parser/AstBuilder.scala | 68 ++++++++- .../sql/errors/QueryCompilationErrors.scala | 8 +- .../sql-tests/inputs/identifier-clause.sql | 84 +++++++++++ .../apache/spark/sql/ParametersSuite.scala | 130 ++++++++++++++++++ 7 files changed, 351 insertions(+), 14 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index c3f2c49a446b..5c3f475669ce 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -2000,7 +2000,7 @@ }, "IDENTIFIER_TOO_MANY_NAME_PARTS" : { "message" : [ - " is not a valid identifier as it has more than 2 name parts." + " is not a valid identifier as it has more than name parts." ], "sqlState" : "42601" }, diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 18f262f817f8..7bf01f98489d 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -668,8 +668,8 @@ dmlStatementNoWith ; identifierReference - : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN - | multipartIdentifier + : multipartIdentifier + | IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN ; catalogIdentifierReference @@ -1591,6 +1591,7 @@ identifier strictIdentifier : IDENTIFIER #unquotedIdentifier | quotedIdentifier #quotedIdentifierAlternative + | IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN #identifierLiteral | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier ; diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index e47e1e1bae0b..21c032b2a155 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -20,7 +20,7 @@ import java.util.Locale import scala.jdk.CollectionConverters._ -import org.antlr.v4.runtime.Token +import org.antlr.v4.runtime.{ParserRuleContext, Token} import org.antlr.v4.runtime.tree.ParseTree import org.apache.spark.SparkException @@ -106,11 +106,71 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { Option(ctx).map(visit(_).asInstanceOf[Token]).orNull /** - * Create a multi-part identifier. + * Get the identifier parts from a context, handling both regular identifiers and + * IDENTIFIER('literal'). This method is used to support identifier-lite syntax where + * IDENTIFIER('string') is folded at parse time. For qualified identifiers like + * IDENTIFIER('`catalog`.`schema`'), this will parse the string and return multiple parts. + * + * Subclasses should override this method to provide actual parsing logic. + */ + protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = { + ctx match { + case idLitCtx: IdentifierLiteralContext => + // For IDENTIFIER('literal'), extract the string literal value and parse it + val literalValue = string(visitStringLit(idLitCtx.stringLit())) + // This base implementation just returns the literal as a single part + // Subclasses should override to parse qualified identifiers + Seq(literalValue) + case _ => + // For regular identifiers, just return the text as a single part + Seq(ctx.getText) + } + } + + /** + * Get the text of a SINGLE identifier, handling both regular identifiers and + * IDENTIFIER('literal'). This method REQUIRES that the identifier be unqualified (single part + * only). If IDENTIFIER('qualified.name') is used where a single identifier is required, this + * will error. + */ + protected def getIdentifierText(ctx: ParserRuleContext): String = { + val parts = getIdentifierParts(ctx) + if (parts.size > 1) { + ctx match { + case idLitCtx: IdentifierLiteralContext => + val literalValue = string(visitStringLit(idLitCtx.stringLit())) + // Use existing error: IDENTIFIER_TOO_MANY_NAME_PARTS with limit=1 + throw new ParseException( + errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS", + messageParameters = Map("identifier" -> literalValue, "limit" -> "1"), + ctx) + case _ => + // This shouldn't happen for regular identifiers in strictIdentifier context + throw new IllegalStateException( + s"Expected single identifier but got qualified name: ${parts.mkString(".")}") + } + } + parts.head + } + + /** + * Create a multi-part identifier. Handles identifier-lite with qualified identifiers like + * IDENTIFIER('`cat`.`schema`').table */ override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] = withOrigin(ctx) { - ctx.parts.asScala.map(_.getText).toSeq + ctx.parts.asScala.flatMap { part => + // Each part can be an errorCapturingIdentifier, which contains an identifier + // The identifier can be a strictIdentifier which might be an identifierLiteral + val identifierCtx = part.identifier() + if (identifierCtx != null && identifierCtx.strictIdentifier() != null) { + // getIdentifierParts handles both regular identifiers (returns Seq with 1 element) + // and identifier-lite (may return multiple parts if the literal is qualified) + getIdentifierParts(identifierCtx.strictIdentifier()) + } else { + Seq(part.getText) + } + }.toSeq } /** @@ -296,7 +356,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } StructField( - name = colName.getText, + name = getIdentifierText(colName.identifier.strictIdentifier()), dataType = typedVisit[DataType](ctx.dataType), nullable = NULL == null, metadata = builder.build()) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c24a7dfd30a6..788f7310ef3c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -118,6 +118,25 @@ class AstBuilder extends DataTypeAstBuilder } } + /** + * Override the base getIdentifierParts to properly parse qualified identifiers in + * IDENTIFIER('literal') contexts. Uses CatalystSqlParser to handle qualified identifiers + * like IDENTIFIER('`catalog`.`schema`') which should be parsed as Seq("catalog", "schema"). + */ + override protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = { + ctx match { + case idLitCtx: IdentifierLiteralContext => + // For IDENTIFIER('literal'), extract the string literal value and parse it + val literalValue = string(visitStringLit(idLitCtx.stringLit())) + // Parse the string as a multi-part identifier + // (e.g., "`cat`.`schema`" -> Seq("cat", "schema")) + CatalystSqlParser.parseMultipartIdentifier(literalValue) + case _ => + // For regular identifiers, just return the text as a single part + Seq(ctx.getText) + } + } + /** * Retrieves the original input text for a given parser context, preserving all whitespace and * formatting. @@ -2544,9 +2563,11 @@ class AstBuilder extends DataTypeAstBuilder /** * Create a Sequence of Strings for an identifier list. + * Note: Each identifier in the list is kept as a single string, even if it's a qualified + * identifier-lite (e.g., IDENTIFIER('a.b') stays as "a.b", not split into parts). */ override def visitIdentifierSeq(ctx: IdentifierSeqContext): Seq[String] = withOrigin(ctx) { - ctx.ident.asScala.map(_.getText).toSeq + ctx.ident.asScala.map(id => getIdentifierText(id.identifier.strictIdentifier())).toSeq } /* ******************************************************************************************** @@ -2554,18 +2575,54 @@ class AstBuilder extends DataTypeAstBuilder * ******************************************************************************************** */ /** * Create a [[TableIdentifier]] from a 'tableName' or 'databaseName'.'tableName' pattern. + * Handles identifier-lite with qualified identifiers. */ override def visitTableIdentifier( ctx: TableIdentifierContext): TableIdentifier = withOrigin(ctx) { - TableIdentifier(ctx.table.getText, Option(ctx.db).map(_.getText)) + // Get the table parts (may be multiple if using qualified identifier-lite) + val tableParts = getIdentifierParts(ctx.table.identifier.strictIdentifier()) + + // Get the database parts if present + val dbParts = Option(ctx.db).map(db => getIdentifierParts(db.identifier.strictIdentifier())) + + // Combine db and table parts + val allParts = dbParts.getOrElse(Seq.empty) ++ tableParts + + // TableIdentifier expects (table, database) where database is optional + // If we have multiple parts, the last is the table, everything before is the database path + allParts match { + case Seq(table) => TableIdentifier(table, None) + case parts if parts.size >= 2 => + TableIdentifier(parts.last, Some(parts.dropRight(1).mkString("."))) + case _ => + throw new IllegalStateException(s"Invalid table identifier: ${ctx.getText}") + } } /** * Create a [[FunctionIdentifier]] from a 'functionName' or 'databaseName'.'functionName' pattern. + * Handles identifier-lite with qualified identifiers. */ override def visitFunctionIdentifier( ctx: FunctionIdentifierContext): FunctionIdentifier = withOrigin(ctx) { - FunctionIdentifier(ctx.function.getText, Option(ctx.db).map(_.getText)) + // Get the function parts (may be multiple if using qualified identifier-lite) + val functionParts = getIdentifierParts(ctx.function.identifier.strictIdentifier()) + + // Get the database parts if present + val dbParts = Option(ctx.db).map(db => getIdentifierParts(db.identifier.strictIdentifier())) + + // Combine db and function parts + val allParts = dbParts.getOrElse(Seq.empty) ++ functionParts + + // FunctionIdentifier expects (function, database) where database is optional + // If we have multiple parts, the last is the function, everything before is the database path + allParts match { + case Seq(function) => FunctionIdentifier(function, None) + case parts if parts.size >= 2 => + FunctionIdentifier(parts.last, Some(parts.dropRight(1).mkString("."))) + case _ => + throw new IllegalStateException(s"Invalid function identifier: ${ctx.getText}") + } } /* ******************************************************************************************** @@ -4033,7 +4090,7 @@ class AstBuilder extends DataTypeAstBuilder ctx: ColDefinitionContext): ColumnAndConstraint = withOrigin(ctx) { import ctx._ - val name: String = colName.getText + val name: String = getIdentifierText(colName.identifier.strictIdentifier()) // Check that no duplicates exist among any CREATE TABLE column options specified. var nullable = true var defaultExpression: Option[DefaultExpressionContext] = None @@ -5403,7 +5460,8 @@ class AstBuilder extends DataTypeAstBuilder invalidStatement("ALTER TABLE ... PARTITION ... CHANGE COLUMN", ctx) } val columnNameParts = typedVisit[Seq[String]](ctx.colName) - if (!conf.resolver(columnNameParts.last, ctx.colType().colName.getText)) { + if (!conf.resolver(columnNameParts.last, + getIdentifierText(ctx.colType().colName.identifier.strictIdentifier()))) { throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError("Renaming column", "ALTER COLUMN", ctx, Some("please run RENAME COLUMN instead")) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 7d79c5d5d642..ffccff0a9a75 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -2239,13 +2239,17 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat def identifierTooManyNamePartsError(originalIdentifier: String): Throwable = { new AnalysisException( errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS", - messageParameters = Map("identifier" -> toSQLId(originalIdentifier))) + messageParameters = Map( + "identifier" -> toSQLId(originalIdentifier), + "limit" -> "2")) } def identifierTooManyNamePartsError(names: Seq[String]): Throwable = { new AnalysisException( errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS", - messageParameters = Map("identifier" -> toSQLId(names))) + messageParameters = Map( + "identifier" -> toSQLId(names), + "limit" -> "2")) } def emptyMultipartIdentifierError(): Throwable = { diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index 4aa8019097fd..3fc5b23d7299 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -157,6 +157,63 @@ SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('T'); WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' || 'BC'); +-- Identifier-lite: Tests for string literal-only IDENTIFIER() usage +-- These tests verify that IDENTIFIER('literal') works in all identifier positions +-- Note: The difference from tests above is these use ONLY string literals (no expressions/variables) + +-- Identifier-lite in column definitions +CREATE TABLE IDENTIFIER('id_lite_col_test')(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) USING CSV; +INSERT INTO IDENTIFIER('id_lite_col_test') VALUES (1, 'test'); +SELECT IDENTIFIER('col1'), IDENTIFIER('col2') FROM IDENTIFIER('id_lite_col_test'); +DROP TABLE IDENTIFIER('id_lite_col_test'); + +-- Identifier-lite in ALTER TABLE operations +CREATE TABLE IDENTIFIER('id_lite_alter')(c1 INT) USING CSV; +ALTER TABLE IDENTIFIER('id_lite_alter') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1'); +ALTER TABLE IDENTIFIER('id_lite_alter') ADD COLUMN IDENTIFIER('c2') INT; +ALTER TABLE IDENTIFIER('id_lite_alter') DROP COLUMN IDENTIFIER('c2'); +ALTER TABLE IDENTIFIER('id_lite_alter') RENAME TO IDENTIFIER('id_lite_renamed'); +DROP TABLE IDENTIFIER('id_lite_renamed'); + +-- Identifier-lite with multiple qualified parts +CREATE SCHEMA identifier_lite_schema; +CREATE TABLE IDENTIFIER('identifier_lite_schema.qualified_test')(c1 INT) USING CSV; +INSERT INTO IDENTIFIER('identifier_lite_schema.qualified_test') VALUES(42); +SELECT * FROM IDENTIFIER('identifier_lite_schema.qualified_test'); +DROP TABLE IDENTIFIER('identifier_lite_schema.qualified_test'); +DROP SCHEMA identifier_lite_schema; + +-- Identifier-lite with qualified identifiers in different positions +CREATE SCHEMA cat1; +CREATE TABLE cat1.tab1(c1 INT) USING CSV; + +-- IDENTIFIER('schema').table syntax +INSERT INTO IDENTIFIER('cat1').tab1 VALUES(1); +SELECT * FROM IDENTIFIER('cat1').tab1; + +-- IDENTIFIER('schema.table') syntax +SELECT * FROM IDENTIFIER('cat1.tab1'); + +-- Mixed: IDENTIFIER('schema').IDENTIFIER('table') +SELECT * FROM IDENTIFIER('cat1').IDENTIFIER('tab1'); + +DROP TABLE cat1.tab1; +DROP SCHEMA cat1; + +-- Identifier-lite with backticks in qualified names +CREATE SCHEMA `schema 1`; +CREATE TABLE `schema 1`.`table 1`(c1 INT) USING CSV; + +-- Use identifier-lite with backticked qualified name +INSERT INTO IDENTIFIER('`schema 1`.`table 1`') VALUES(100); +SELECT * FROM IDENTIFIER('`schema 1`.`table 1`'); + +-- Mixed: IDENTIFIER for schema part, regular for table +SELECT * FROM IDENTIFIER('`schema 1`').`table 1`; + +DROP TABLE `schema 1`.`table 1`; +DROP SCHEMA `schema 1`; + -- Not supported SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1); SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')); @@ -173,5 +230,32 @@ INSERT INTO tab(IDENTIFIER('c1')) VALUES(1); CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1); CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV; +-- Identifier-lite: Column definitions should work with string literals +-- (This is a positive test showing identifier-lite works in column definitions) +CREATE TABLE IDENTIFIER('id_lite_coldef_ok')(IDENTIFIER('c1') INT) USING CSV; +DROP TABLE IDENTIFIER('id_lite_coldef_ok'); + +-- Identifier-lite: Error when qualified identifier used in single identifier context +-- This should error because 'col1.col2' is qualified but column name must be single +CREATE TABLE test_qualified_col_error(IDENTIFIER('col1.col2') INT) USING CSV; + +-- This should error because 'schema.table' is qualified but used as column name +CREATE TABLE test_qualified_col_error2(id INT, IDENTIFIER('schema.table') STRING) USING CSV; + +-- Correct way: use backticks to create a single identifier with a dot +CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV; +DROP TABLE test_col_with_dot; + +-- Identifier-lite in column aliases (AS clause) +SELECT 1 AS IDENTIFIER('col1'); +SELECT 'hello' AS IDENTIFIER('my_column'); + +-- Identifier-lite in table value constructor with table and column aliases +SELECT * FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')); +SELECT * FROM VALUES (10, 20) AS IDENTIFIER('t')(IDENTIFIER('col_a'), IDENTIFIER('col_b')); +-- Identifier-lite: table alias with qualified name should error (table alias must be single) +SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2); +-- Identifier-lite: column alias with qualified name should error (column alias must be single) +SELECT 1 AS IDENTIFIER('col1.col2'); diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala index e30b48fdb176..f66eb9df4caf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala @@ -2375,3 +2375,133 @@ class ParametersSuite extends QueryTest with SharedSparkSession { ) } } + +class IdentifierWithParametersSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + test("IDENTIFIER with parameter - table reference") { + // Test IDENTIFIER with parameters that get substituted before parse + // This tests: parameter substitution -> identifier-lite parse-time resolution + withTable("test_table") { + spark.range(5).write.saveAsTable("test_table") + + checkAnswer( + spark.sql("SELECT * FROM IDENTIFIER(:table_name) ORDER BY id", + Map("table_name" -> "test_table")), + Seq(Row(0), Row(1), Row(2), Row(3), Row(4)) + ) + } + } + + test("IDENTIFIER with parameter - column reference in SELECT list") { + // Test IDENTIFIER with parameters for column names in SELECT list + val df = Seq((1, "a"), (2, "b"), (3, "c")).toDF("col1", "col2") + df.createOrReplaceTempView("test_view") + + checkAnswer( + spark.sql("SELECT IDENTIFIER(:col_name) FROM test_view", + Map("col_name" -> "col1")), + Seq(Row(1), Row(2), Row(3)) + ) + + checkAnswer( + spark.sql("SELECT IDENTIFIER(:c1), IDENTIFIER(:c2) FROM test_view", + Map("c1" -> "col1", "c2" -> "col2")), + Seq(Row(1, "a"), Row(2, "b"), Row(3, "c")) + ) + } + + test("IDENTIFIER with parameter - qualified table name") { + // Test IDENTIFIER with parameters for qualified identifiers + withTable("test_qualified") { + spark.sql("CREATE TABLE test_qualified (c1 INT) USING parquet") + spark.sql("INSERT INTO test_qualified VALUES (42)") + + checkAnswer( + spark.sql("SELECT * FROM IDENTIFIER(:qual_table)", + Map("qual_table" -> "default.test_qualified")), + Seq(Row(42)) + ) + } + } + + test("IDENTIFIER with parameter - backticked identifier with spaces") { + // Test IDENTIFIER with parameters for backticked identifiers + val df = Seq((1, 2)).toDF("col 1", "col 2") + df.createOrReplaceTempView("test_view2") + + checkAnswer( + spark.sql("SELECT IDENTIFIER(:col_name) FROM test_view2", + Map("col_name" -> "`col 1`")), + Seq(Row(1)) + ) + } + + test("IDENTIFIER with parameter - DDL statements") { + // Test IDENTIFIER with parameters in CREATE/DROP TABLE statements + val tableName = "param_table" + withTable(tableName) { + spark.sql("CREATE TABLE IDENTIFIER(:tbl) (c1 INT) USING parquet", + Map("tbl" -> tableName)) + spark.sql("INSERT INTO IDENTIFIER(:tbl) VALUES (100)", + Map("tbl" -> tableName)) + + checkAnswer( + spark.sql("SELECT * FROM IDENTIFIER(:tbl)", Map("tbl" -> tableName)), + Seq(Row(100)) + ) + + spark.sql("DROP TABLE IDENTIFIER(:tbl)", Map("tbl" -> tableName)) + } + } + + test("IDENTIFIER with parameter - function names") { + // Test IDENTIFIER with parameters for function references + checkAnswer( + spark.sql("SELECT IDENTIFIER(:func_name)(-5)", Map("func_name" -> "abs")), + Seq(Row(5)) + ) + + checkAnswer( + spark.sql("SELECT IDENTIFIER(:func)('hello')", Map("func" -> "upper")), + Seq(Row("HELLO")) + ) + } + + test("IDENTIFIER with parameter - column reference in WHERE clause") { + // Test IDENTIFIER with parameters in WHERE clause + val df = Seq((1, "a"), (2, "b"), (3, "c")).toDF("col1", "col2") + df.createOrReplaceTempView("test_view3") + + checkAnswer( + spark.sql("SELECT * FROM test_view3 WHERE IDENTIFIER(:col) > 1", + Map("col" -> "col1")), + Seq(Row(2, "b"), Row(3, "c")) + ) + } + + test("IDENTIFIER with parameter - column reference in GROUP BY") { + // Test IDENTIFIER with parameters in GROUP BY clause + val df = Seq((1, "a"), (1, "b"), (2, "c")).toDF("col1", "col2") + df.createOrReplaceTempView("test_view4") + + checkAnswer( + spark.sql( + "SELECT IDENTIFIER(:col), COUNT(*) FROM test_view4 GROUP BY IDENTIFIER(:col)", + Map("col" -> "col1")), + Seq(Row(1, 2), Row(2, 1)) + ) + } + + test("IDENTIFIER with parameter - column reference in ORDER BY") { + // Test IDENTIFIER with parameters in ORDER BY clause + val df = Seq((3, "a"), (1, "b"), (2, "c")).toDF("col1", "col2") + df.createOrReplaceTempView("test_view5") + + checkAnswer( + spark.sql("SELECT * FROM test_view5 ORDER BY IDENTIFIER(:col)", + Map("col" -> "col1")), + Seq(Row(1, "b"), Row(2, "c"), Row(3, "a")) + ) + } +} From 7b0da8479e16b53eab9ba62180466499048606d9 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 28 Oct 2025 15:26:25 -0700 Subject: [PATCH 02/37] fix testcases --- .../sql/catalyst/parser/AstBuilder.scala | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 788f7310ef3c..9e9f72f51a52 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2580,10 +2580,22 @@ class AstBuilder extends DataTypeAstBuilder override def visitTableIdentifier( ctx: TableIdentifierContext): TableIdentifier = withOrigin(ctx) { // Get the table parts (may be multiple if using qualified identifier-lite) - val tableParts = getIdentifierParts(ctx.table.identifier.strictIdentifier()) + // Handle null case for error recovery + val tableParts = if (ctx.table != null && ctx.table.identifier != null && + ctx.table.identifier.strictIdentifier() != null) { + getIdentifierParts(ctx.table.identifier.strictIdentifier()) + } else { + Seq(ctx.table.getText) + } // Get the database parts if present - val dbParts = Option(ctx.db).map(db => getIdentifierParts(db.identifier.strictIdentifier())) + val dbParts = Option(ctx.db).flatMap { db => + if (db.identifier != null && db.identifier.strictIdentifier() != null) { + Some(getIdentifierParts(db.identifier.strictIdentifier())) + } else { + Some(Seq(db.getText)) + } + } // Combine db and table parts val allParts = dbParts.getOrElse(Seq.empty) ++ tableParts @@ -2606,10 +2618,22 @@ class AstBuilder extends DataTypeAstBuilder override def visitFunctionIdentifier( ctx: FunctionIdentifierContext): FunctionIdentifier = withOrigin(ctx) { // Get the function parts (may be multiple if using qualified identifier-lite) - val functionParts = getIdentifierParts(ctx.function.identifier.strictIdentifier()) + // Handle null case for error recovery + val functionParts = if (ctx.function != null && ctx.function.identifier != null && + ctx.function.identifier.strictIdentifier() != null) { + getIdentifierParts(ctx.function.identifier.strictIdentifier()) + } else { + Seq(ctx.function.getText) + } // Get the database parts if present - val dbParts = Option(ctx.db).map(db => getIdentifierParts(db.identifier.strictIdentifier())) + val dbParts = Option(ctx.db).flatMap { db => + if (db.identifier != null && db.identifier.strictIdentifier() != null) { + Some(getIdentifierParts(db.identifier.strictIdentifier())) + } else { + Some(Seq(db.getText)) + } + } // Combine db and function parts val allParts = dbParts.getOrElse(Seq.empty) ++ functionParts From c3a12e02730374e79dd06a33caa5e9c7001f3ed0 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 4 Nov 2025 12:09:04 -0800 Subject: [PATCH 03/37] Lots of fixes --- .../sql/catalyst/parser/SqlBaseParser.g4 | 3 +- .../catalyst/parser/DataTypeAstBuilder.scala | 107 ++++-- .../sql/catalyst/parser/AstBuilder.scala | 82 +++-- .../spark/sql/execution/SparkSqlParser.scala | 3 +- .../identifier-clause.sql.out | 335 +++++++++++++----- .../sql-tests/inputs/identifier-clause.sql | 99 +----- .../results/identifier-clause.sql.out | 317 ++++++++++++----- 7 files changed, 631 insertions(+), 315 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index df1a0249cf16..d445ce6f0d22 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1585,7 +1585,8 @@ qualifiedName // replace identifier with errorCapturingIdentifier where the immediate follow symbol is not an expression, otherwise // valid expressions such as "a-b" can be recognized as an identifier errorCapturingIdentifier - : identifier errorCapturingIdentifierExtra + : identifier errorCapturingIdentifierExtra #errorCapturingIdentifierBase + | IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN errorCapturingIdentifierExtra #identifierLiteralWithExtra ; // extra left-factoring grammar diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 39636557e387..a9af0ed4f85a 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -170,12 +170,34 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { */ protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = { ctx match { + case idCtx: IdentifierContext => + // identifier can be either strictIdentifier or strictNonReserved + // Recursively process the strictIdentifier + if (idCtx.strictIdentifier() != null) { + getIdentifierParts(idCtx.strictIdentifier()) + } else { + Seq(ctx.getText) + } case idLitCtx: IdentifierLiteralContext => - // For IDENTIFIER('literal'), extract the string literal value and parse it + // For IDENTIFIER('literal') in strictIdentifier + val literalValue = string(visitStringLit(idLitCtx.stringLit())) + // This base implementation just returns the literal as a single part + // Subclasses should override to parse qualified identifiers + Seq(literalValue) + case idLitCtx: IdentifierLiteralWithExtraContext => + // For IDENTIFIER('literal') in errorCapturingIdentifier val literalValue = string(visitStringLit(idLitCtx.stringLit())) // This base implementation just returns the literal as a single part // Subclasses should override to parse qualified identifiers Seq(literalValue) + case base: ErrorCapturingIdentifierBaseContext => + // Regular identifier with errorCapturingIdentifierExtra + // Need to recursively handle identifier which might itself be IDENTIFIER('literal') + if (base.identifier() != null && base.identifier().strictIdentifier() != null) { + getIdentifierParts(base.identifier().strictIdentifier()) + } else { + Seq(ctx.getText) + } case _ => // For regular identifiers, just return the text as a single part Seq(ctx.getText) @@ -191,23 +213,51 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { protected def getIdentifierText(ctx: ParserRuleContext): String = { val parts = getIdentifierParts(ctx) if (parts.size > 1) { - ctx match { - case idLitCtx: IdentifierLiteralContext => - val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // Use existing error: IDENTIFIER_TOO_MANY_NAME_PARTS with limit=1 - throw new ParseException( - errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS", - messageParameters = Map("identifier" -> literalValue, "limit" -> "1"), - ctx) - case _ => - // This shouldn't happen for regular identifiers in strictIdentifier context - throw new IllegalStateException( - s"Expected single identifier but got qualified name: ${parts.mkString(".")}") + // Try to find the original IDENTIFIER('literal') context for better error messages + val literalValue = extractIdentifierLiteral(ctx) + if (literalValue.isDefined) { + throw new ParseException( + errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS", + messageParameters = Map("identifier" -> literalValue.get, "limit" -> "1"), + ctx) + } else { + // Regular qualified identifier without IDENTIFIER() + throw new IllegalStateException( + s"Expected single identifier but got qualified name: ${parts.mkString(".")}") } } parts.head } + /** + * Extract the string literal value from IDENTIFIER('literal') if present in the context tree. + * Returns None if this is not an IDENTIFIER('literal') construct. + */ + private def extractIdentifierLiteral(ctx: ParserRuleContext): Option[String] = { + ctx match { + case idLitCtx: IdentifierLiteralContext => + Some(string(visitStringLit(idLitCtx.stringLit()))) + case idLitCtx: IdentifierLiteralWithExtraContext => + Some(string(visitStringLit(idLitCtx.stringLit()))) + case idCtx: IdentifierContext => + // Recurse into strictIdentifier + if (idCtx.strictIdentifier() != null) { + extractIdentifierLiteral(idCtx.strictIdentifier()) + } else { + None + } + case base: ErrorCapturingIdentifierBaseContext => + // Recurse into identifier + if (base.identifier() != null && base.identifier().strictIdentifier() != null) { + extractIdentifierLiteral(base.identifier().strictIdentifier()) + } else { + None + } + case _ => + None + } + } + /** * Create a multi-part identifier. Handles identifier-lite with qualified identifiers like * IDENTIFIER('`cat`.`schema`').table @@ -215,15 +265,26 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] = withOrigin(ctx) { ctx.parts.asScala.flatMap { part => - // Each part can be an errorCapturingIdentifier, which contains an identifier - // The identifier can be a strictIdentifier which might be an identifierLiteral - val identifierCtx = part.identifier() - if (identifierCtx != null && identifierCtx.strictIdentifier() != null) { - // getIdentifierParts handles both regular identifiers (returns Seq with 1 element) - // and identifier-lite (may return multiple parts if the literal is qualified) - getIdentifierParts(identifierCtx.strictIdentifier()) - } else { - Seq(part.getText) + // Each part is an errorCapturingIdentifier, which can be either: + // 1. identifier errorCapturingIdentifierExtra (regular path) - labeled as + // #errorCapturingIdentifierBase + // 2. IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN errorCapturingIdentifierExtra + // (identifier-lite path) - labeled as #identifierLiteralWithExtra + part match { + case idLitWithExtra: IdentifierLiteralWithExtraContext => + // This is identifier-lite: IDENTIFIER('string') + getIdentifierParts(idLitWithExtra) + case base: ErrorCapturingIdentifierBaseContext => + // Regular identifier path + val identifierCtx = base.identifier() + if (identifierCtx != null && identifierCtx.strictIdentifier() != null) { + getIdentifierParts(identifierCtx.strictIdentifier()) + } else { + Seq(part.getText) + } + case _ => + // Fallback for other cases + Seq(part.getText) } }.toSeq } @@ -411,7 +472,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { } StructField( - name = getIdentifierText(colName.identifier.strictIdentifier()), + name = getIdentifierText(colName), dataType = typedVisit[DataType](ctx.dataType), nullable = NULL == null, metadata = builder.build()) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ba36ae07575d..97061eb0422e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -125,12 +125,34 @@ class AstBuilder extends DataTypeAstBuilder */ override protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = { ctx match { + case idCtx: IdentifierContext => + // identifier can be either strictIdentifier or strictNonReserved + // Recursively process the strictIdentifier + if (idCtx.strictIdentifier() != null) { + getIdentifierParts(idCtx.strictIdentifier()) + } else { + Seq(ctx.getText) + } case idLitCtx: IdentifierLiteralContext => - // For IDENTIFIER('literal'), extract the string literal value and parse it + // For IDENTIFIER('literal') in strictIdentifier + val literalValue = string(visitStringLit(idLitCtx.stringLit())) + // Parse the string as a multi-part identifier + // (e.g., "`cat`.`schema`" -> Seq("cat", "schema")) + CatalystSqlParser.parseMultipartIdentifier(literalValue) + case idLitCtx: IdentifierLiteralWithExtraContext => + // For IDENTIFIER('literal') in errorCapturingIdentifier val literalValue = string(visitStringLit(idLitCtx.stringLit())) // Parse the string as a multi-part identifier // (e.g., "`cat`.`schema`" -> Seq("cat", "schema")) CatalystSqlParser.parseMultipartIdentifier(literalValue) + case base: ErrorCapturingIdentifierBaseContext => + // Regular identifier with errorCapturingIdentifierExtra + // Need to recursively handle identifier which might itself be IDENTIFIER('literal') + if (base.identifier() != null && base.identifier().strictIdentifier() != null) { + getIdentifierParts(base.identifier().strictIdentifier()) + } else { + Seq(ctx.getText) + } case _ => // For regular identifiers, just return the text as a single part Seq(ctx.getText) @@ -816,7 +838,8 @@ class AstBuilder extends DataTypeAstBuilder (columnAliases, plan) => UnresolvedSubqueryColumnAliases(visitIdentifierList(columnAliases), plan) ) - SubqueryAlias(ctx.name.getText, subQuery) + // Use getIdentifierText to handle both regular identifiers and IDENTIFIER('literal') + SubqueryAlias(getIdentifierText(ctx.name), subQuery) } /** @@ -1805,7 +1828,8 @@ class AstBuilder extends DataTypeAstBuilder // Collect all window specifications defined in the WINDOW clause. val baseWindowTuples = ctx.namedWindow.asScala.map { wCtx => - (wCtx.name.getText, typedVisit[WindowSpec](wCtx.windowSpec)) + // Use getIdentifierText to handle both regular identifiers and IDENTIFIER('literal') + (getIdentifierText(wCtx.name), typedVisit[WindowSpec](wCtx.windowSpec)) } baseWindowTuples.groupBy(_._1).foreach { kv => if (kv._2.size > 1) { @@ -2533,7 +2557,8 @@ class AstBuilder extends DataTypeAstBuilder * Create an alias ([[SubqueryAlias]]) for a [[LogicalPlan]]. */ private def aliasPlan(alias: ParserRuleContext, plan: LogicalPlan): LogicalPlan = { - SubqueryAlias(alias.getText, plan) + // Use getIdentifierText to handle both regular identifiers and IDENTIFIER('literal') + SubqueryAlias(getIdentifierText(alias), plan) } /** @@ -2567,7 +2592,7 @@ class AstBuilder extends DataTypeAstBuilder * identifier-lite (e.g., IDENTIFIER('a.b') stays as "a.b", not split into parts). */ override def visitIdentifierSeq(ctx: IdentifierSeqContext): Seq[String] = withOrigin(ctx) { - ctx.ident.asScala.map(id => getIdentifierText(id.identifier.strictIdentifier())).toSeq + ctx.ident.asScala.map(id => getIdentifierText(id)).toSeq } /* ******************************************************************************************** @@ -2581,20 +2606,15 @@ class AstBuilder extends DataTypeAstBuilder ctx: TableIdentifierContext): TableIdentifier = withOrigin(ctx) { // Get the table parts (may be multiple if using qualified identifier-lite) // Handle null case for error recovery - val tableParts = if (ctx.table != null && ctx.table.identifier != null && - ctx.table.identifier.strictIdentifier() != null) { - getIdentifierParts(ctx.table.identifier.strictIdentifier()) + val tableParts = if (ctx.table != null) { + getIdentifierParts(ctx.table) } else { - Seq(ctx.table.getText) + Seq("") } // Get the database parts if present - val dbParts = Option(ctx.db).flatMap { db => - if (db.identifier != null && db.identifier.strictIdentifier() != null) { - Some(getIdentifierParts(db.identifier.strictIdentifier())) - } else { - Some(Seq(db.getText)) - } + val dbParts = Option(ctx.db).map { db => + getIdentifierParts(db) } // Combine db and table parts @@ -2619,20 +2639,15 @@ class AstBuilder extends DataTypeAstBuilder ctx: FunctionIdentifierContext): FunctionIdentifier = withOrigin(ctx) { // Get the function parts (may be multiple if using qualified identifier-lite) // Handle null case for error recovery - val functionParts = if (ctx.function != null && ctx.function.identifier != null && - ctx.function.identifier.strictIdentifier() != null) { - getIdentifierParts(ctx.function.identifier.strictIdentifier()) + val functionParts = if (ctx.function != null) { + getIdentifierParts(ctx.function) } else { - Seq(ctx.function.getText) + Seq("") } // Get the database parts if present - val dbParts = Option(ctx.db).flatMap { db => - if (db.identifier != null && db.identifier.strictIdentifier() != null) { - Some(getIdentifierParts(db.identifier.strictIdentifier())) - } else { - Some(Seq(db.getText)) - } + val dbParts = Option(ctx.db).map { db => + getIdentifierParts(db) } // Combine db and function parts @@ -2720,7 +2735,8 @@ class AstBuilder extends DataTypeAstBuilder override def visitNamedExpression(ctx: NamedExpressionContext): Expression = withOrigin(ctx) { val e = expression(ctx.expression) if (ctx.name != null) { - Alias(e, ctx.name.getText)() + // Use getIdentifierText to handle both regular identifiers and IDENTIFIER('literal') + Alias(e, getIdentifierText(ctx.name))() } else if (ctx.identifierList != null) { MultiAlias(e, visitIdentifierList(ctx.identifierList)) } else { @@ -3298,7 +3314,8 @@ class AstBuilder extends DataTypeAstBuilder * Create a reference to a window frame, i.e. [[WindowSpecReference]]. */ override def visitWindowRef(ctx: WindowRefContext): WindowSpecReference = withOrigin(ctx) { - WindowSpecReference(ctx.name.getText) + // Use getIdentifierText to handle both regular identifiers and IDENTIFIER('literal') + WindowSpecReference(getIdentifierText(ctx.name)) } /** @@ -3434,10 +3451,13 @@ class AstBuilder extends DataTypeAstBuilder * it can be [[UnresolvedExtractValue]]. */ override def visitDereference(ctx: DereferenceContext): Expression = withOrigin(ctx) { - val attr = ctx.fieldName.getText + // Use getIdentifierText to handle both regular identifiers and IDENTIFIER('literal') + val attr = getIdentifierText(ctx.fieldName) expression(ctx.base) match { case unresolved_attr @ UnresolvedAttribute(nameParts) => - ctx.fieldName.getStart.getText match { + // For regex check, we need the original text before identifier-lite resolution + val originalText = ctx.fieldName.getStart.getText + originalText match { case escapedIdentifier(columnNameRegex) if conf.supportQuotedRegexColumnName && isRegex(columnNameRegex) && canApplyRegex(ctx) => @@ -4127,7 +4147,7 @@ class AstBuilder extends DataTypeAstBuilder ctx: ColDefinitionContext): ColumnAndConstraint = withOrigin(ctx) { import ctx._ - val name: String = getIdentifierText(colName.identifier.strictIdentifier()) + val name: String = getIdentifierText(colName) // Check that no duplicates exist among any CREATE TABLE column options specified. var nullable = true var defaultExpression: Option[DefaultExpressionContext] = None @@ -5542,7 +5562,7 @@ class AstBuilder extends DataTypeAstBuilder } val columnNameParts = typedVisit[Seq[String]](ctx.colName) if (!conf.resolver(columnNameParts.last, - getIdentifierText(ctx.colType().colName.identifier.strictIdentifier()))) { + getIdentifierText(ctx.colType().colName))) { throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError("Renaming column", "ALTER COLUMN", ctx, Some("please run RENAME COLUMN instead")) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 58bffbed3e69..ef00f30fbdda 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -629,7 +629,8 @@ class SparkSqlAstBuilder extends AstBuilder { val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl => icl.identifierComment.asScala.map { ic => - ic.identifier.getText -> Option(ic.commentSpec()).map(visitCommentSpec) + // Use getIdentifierText to handle both regular identifiers and IDENTIFIER('literal') + getIdentifierText(ic.identifier) -> Option(ic.commentSpec()).map(visitCommentSpec) } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index 13d911c98838..fefe200dc617 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -574,7 +574,14 @@ CreateVariable defaultvalueexpression(sometable, 'sometable'), false -- !query CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`sometable`, false +org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +{ + "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", + "sqlState" : "42P07", + "messageParameters" : { + "relationName" : "`spark_catalog`.`default`.`sometable`" + } +} -- !query @@ -604,8 +611,15 @@ SetVariable [variablereference(system.session.var='c1')] -- !query DROP TABLE IDENTIFIER(var || 'table') -- !query analysis -DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.sometable +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'var'", + "hint" : "" + } +} -- !query @@ -853,7 +867,8 @@ org.apache.spark.sql.AnalysisException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "`a`.`b`.`c`.`d`" + "identifier" : "`a`.`b`.`c`.`d`", + "limit" : "2" }, "queryContext" : [ { "objectType" : "", @@ -1064,27 +1079,32 @@ SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win -- !query analysis org.apache.spark.sql.catalyst.parser.ParseException { - "errorClass" : "PARSE_SYNTAX_ERROR", + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "error" : "''x.win''", - "hint" : "" - } + "identifier" : "x.win", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 26, + "stopIndex" : 44, + "fragment" : "IDENTIFIER('x.win')" + } ] } -- !query SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'('", - "hint" : "" - } -} +Project [c1#x] ++- Project [c1#x] + +- Join Inner, (c1#x = c1#x) + :- SubqueryAlias T1 + : +- LocalRelation [c1#x] + +- SubqueryAlias T2 + +- LocalRelation [c1#x] -- !query @@ -1111,40 +1131,28 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException -- !query SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "''a''", - "hint" : "" - } -} +Project [map(a, 1)[a] AS map(a, 1)[a]#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] -- !query SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "''a''", - "hint" : "" - } -} +Project [named_struct(a, 1).a AS named_struct(a, 1).a#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] -- !query SELECT * FROM s.IDENTIFIER('tab') -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME", - "sqlState" : "42000", + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", "messageParameters" : { - "funcName" : "`s`.`IDENTIFIER`" + "relationName" : "`s`.`tab`" }, "queryContext" : [ { "objectType" : "", @@ -1159,110 +1167,267 @@ org.apache.spark.sql.catalyst.parser.ParseException -- !query SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", "messageParameters" : { - "error" : "'.'", - "hint" : "" - } + "relationName" : "`s`.`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 47, + "fragment" : "IDENTIFIER('s').IDENTIFIER('tab')" + } ] } -- !query SELECT * FROM IDENTIFIER('s').tab -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", "messageParameters" : { - "error" : "'.'", - "hint" : "" - } + "relationName" : "`s`.`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 33, + "fragment" : "IDENTIFIER('s').tab" + } ] } -- !query SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException +Project [row_number() OVER (ORDER BY c1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] ++- Project [c1#x, row_number() OVER (ORDER BY c1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x, row_number() OVER (ORDER BY c1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] + +- Window [row_number() windowspecdefinition(c1#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS row_number() OVER (ORDER BY c1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x], [c1#x ASC NULLS FIRST] + +- Project [c1#x] + +- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) +-- !query analysis +Project [row_number() OVER (ORDER BY c1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] ++- Project [c1#x, row_number() OVER (ORDER BY c1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x, row_number() OVER (ORDER BY c1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x] + +- Window [row_number() windowspecdefinition(c1#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS row_number() OVER (ORDER BY c1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x], [c1#x ASC NULLS FIRST] + +- Project [c1#x] + +- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +SELECT 1 AS IDENTIFIER('col1') +-- !query analysis +Project [1 AS col1#x] ++- OneRowRelation + + +-- !query +SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) +-- !query analysis +Project [c1#x, c2#x] ++- SubqueryAlias my_table + +- LocalRelation [c1#x, c2#x] + + +-- !query +WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias v +: +- Project [col1#x AS c1#x] +: +- LocalRelation [col1#x] ++- Project [c1#x] + +- SubqueryAlias v + +- CTERelationRef xxxx, true, [c1#x], false, false, 1 + + +-- !query +CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) +-- !query analysis +CreateViewCommand `spark_catalog`.`default`.`v`, [(c1,None)], VALUES(1), false, true, PersistedView, COMPENSATION, true + +- LocalRelation [col1#x] + + +-- !query +SELECT c1 FROM v +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.default.v + +- View (`spark_catalog`.`default`.`v`, [c1#x]) + +- Project [cast(col1#x as int) AS c1#x] + +- LocalRelation [col1#x] + + +-- !query +CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`tab`, false + + +-- !query +INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/tab, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/tab], Append, `spark_catalog`.`default`.`tab`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/tab), [c1] ++- Project [c1#x AS c1#x] + +- Project [col1#x AS c1#x] + +- LocalRelation [col1#x] + + +-- !query +SELECT c1 FROM tab +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.default.tab + +- Relation spark_catalog.default.tab[c1#x] csv + + +-- !query +ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') +-- !query analysis +org.apache.spark.sql.AnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "error" : "''win''", - "hint" : "" + "operation" : "RENAME COLUMN", + "tableName" : "`spark_catalog`.`default`.`tab`" } } -- !query -SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) +SELECT col1 FROM tab -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", "messageParameters" : { - "error" : "'WINDOW'", - "hint" : "" - } + "objectName" : "`col1`", + "proposal" : "`c1`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 11, + "fragment" : "col1" + } ] } -- !query -WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) +ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException +AlterTableAddColumnsCommand `spark_catalog`.`default`.`tab`, [StructField(c2,IntegerType,true)] + + +-- !query +SELECT c2 FROM tab +-- !query analysis +Project [c2#x] ++- SubqueryAlias spark_catalog.default.tab + +- Relation spark_catalog.default.tab[c1#x,c2#x] csv + + +-- !query +ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') +-- !query analysis +org.apache.spark.sql.AnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "error" : "''v''", - "hint" : "" + "operation" : "DROP COLUMN", + "tableName" : "`spark_catalog`.`default`.`tab`" } } -- !query -INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) +ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", + "sqlState" : "42P07", "messageParameters" : { - "error" : "'('", - "hint" : ": missing ')'" + "relationName" : "`default`.`tab_renamed`" } } -- !query -CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) +SELECT * FROM tab_renamed +-- !query analysis +Project [c1#x, c2#x] ++- SubqueryAlias spark_catalog.default.tab_renamed + +- Relation spark_catalog.default.tab_renamed[c1#x,c2#x] csv + + +-- !query +CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`test_col_with_dot`, false + + +-- !query +DROP TABLE IF EXISTS test_col_with_dot +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_col_with_dot + + +-- !query +SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) -- !query analysis org.apache.spark.sql.catalyst.parser.ParseException { - "errorClass" : "PARSE_SYNTAX_ERROR", + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "error" : "'('", - "hint" : "" - } + "identifier" : "schema.table", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 65, + "fragment" : "VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2)" + } ] } -- !query -CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV +SELECT 1 AS IDENTIFIER('col1.col2') -- !query analysis org.apache.spark.sql.catalyst.parser.ParseException { - "errorClass" : "PARSE_SYNTAX_ERROR", + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "error" : "'('", - "hint" : "" - } + "identifier" : "col1.col2", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 35, + "fragment" : "1 AS IDENTIFIER('col1.col2')" + } ] } diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index 3fc5b23d7299..5bfa1da6a848 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -157,64 +157,6 @@ SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('T'); WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' || 'BC'); --- Identifier-lite: Tests for string literal-only IDENTIFIER() usage --- These tests verify that IDENTIFIER('literal') works in all identifier positions --- Note: The difference from tests above is these use ONLY string literals (no expressions/variables) - --- Identifier-lite in column definitions -CREATE TABLE IDENTIFIER('id_lite_col_test')(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) USING CSV; -INSERT INTO IDENTIFIER('id_lite_col_test') VALUES (1, 'test'); -SELECT IDENTIFIER('col1'), IDENTIFIER('col2') FROM IDENTIFIER('id_lite_col_test'); -DROP TABLE IDENTIFIER('id_lite_col_test'); - --- Identifier-lite in ALTER TABLE operations -CREATE TABLE IDENTIFIER('id_lite_alter')(c1 INT) USING CSV; -ALTER TABLE IDENTIFIER('id_lite_alter') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1'); -ALTER TABLE IDENTIFIER('id_lite_alter') ADD COLUMN IDENTIFIER('c2') INT; -ALTER TABLE IDENTIFIER('id_lite_alter') DROP COLUMN IDENTIFIER('c2'); -ALTER TABLE IDENTIFIER('id_lite_alter') RENAME TO IDENTIFIER('id_lite_renamed'); -DROP TABLE IDENTIFIER('id_lite_renamed'); - --- Identifier-lite with multiple qualified parts -CREATE SCHEMA identifier_lite_schema; -CREATE TABLE IDENTIFIER('identifier_lite_schema.qualified_test')(c1 INT) USING CSV; -INSERT INTO IDENTIFIER('identifier_lite_schema.qualified_test') VALUES(42); -SELECT * FROM IDENTIFIER('identifier_lite_schema.qualified_test'); -DROP TABLE IDENTIFIER('identifier_lite_schema.qualified_test'); -DROP SCHEMA identifier_lite_schema; - --- Identifier-lite with qualified identifiers in different positions -CREATE SCHEMA cat1; -CREATE TABLE cat1.tab1(c1 INT) USING CSV; - --- IDENTIFIER('schema').table syntax -INSERT INTO IDENTIFIER('cat1').tab1 VALUES(1); -SELECT * FROM IDENTIFIER('cat1').tab1; - --- IDENTIFIER('schema.table') syntax -SELECT * FROM IDENTIFIER('cat1.tab1'); - --- Mixed: IDENTIFIER('schema').IDENTIFIER('table') -SELECT * FROM IDENTIFIER('cat1').IDENTIFIER('tab1'); - -DROP TABLE cat1.tab1; -DROP SCHEMA cat1; - --- Identifier-lite with backticks in qualified names -CREATE SCHEMA `schema 1`; -CREATE TABLE `schema 1`.`table 1`(c1 INT) USING CSV; - --- Use identifier-lite with backticked qualified name -INSERT INTO IDENTIFIER('`schema 1`.`table 1`') VALUES(100); -SELECT * FROM IDENTIFIER('`schema 1`.`table 1`'); - --- Mixed: IDENTIFIER for schema part, regular for table -SELECT * FROM IDENTIFIER('`schema 1`').`table 1`; - -DROP TABLE `schema 1`.`table 1`; -DROP SCHEMA `schema 1`; - --- Not supported SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1); SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')); SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1); @@ -225,37 +167,26 @@ SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab'); SELECT * FROM IDENTIFIER('s').tab; SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1); SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1); +SELECT 1 AS IDENTIFIER('col1'); +SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')); WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v); -INSERT INTO tab(IDENTIFIER('c1')) VALUES(1); CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1); +SELECT c1 FROM v; CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV; - --- Identifier-lite: Column definitions should work with string literals --- (This is a positive test showing identifier-lite works in column definitions) -CREATE TABLE IDENTIFIER('id_lite_coldef_ok')(IDENTIFIER('c1') INT) USING CSV; -DROP TABLE IDENTIFIER('id_lite_coldef_ok'); - --- Identifier-lite: Error when qualified identifier used in single identifier context --- This should error because 'col1.col2' is qualified but column name must be single -CREATE TABLE test_qualified_col_error(IDENTIFIER('col1.col2') INT) USING CSV; - --- This should error because 'schema.table' is qualified but used as column name -CREATE TABLE test_qualified_col_error2(id INT, IDENTIFIER('schema.table') STRING) USING CSV; - --- Correct way: use backticks to create a single identifier with a dot +INSERT INTO tab(IDENTIFIER('c1')) VALUES(1); +SELECT c1 FROM tab; +ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1'); +SELECT col1 FROM tab; +ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT; +SELECT c2 FROM tab; +ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2'); +ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed'); +SELECT * FROM tab_renamed; + +-- Error because qualified names are not allowed CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV; -DROP TABLE test_col_with_dot; - --- Identifier-lite in column aliases (AS clause) -SELECT 1 AS IDENTIFIER('col1'); -SELECT 'hello' AS IDENTIFIER('my_column'); - --- Identifier-lite in table value constructor with table and column aliases -SELECT * FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')); -SELECT * FROM VALUES (10, 20) AS IDENTIFIER('t')(IDENTIFIER('col_a'), IDENTIFIER('col_b')); - +DROP TABLE IF EXISTS test_col_with_dot; -- Identifier-lite: table alias with qualified name should error (table alias must be single) SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2); - -- Identifier-lite: column alias with qualified name should error (column alias must be single) SELECT 1 AS IDENTIFIER('col1.col2'); diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index 596745b4ba5d..b646bd2e8cf5 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -704,7 +704,15 @@ DROP TABLE IDENTIFIER(var || 'table') -- !query schema struct<> -- !query output - +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'var'", + "hint" : "" + } +} -- !query @@ -980,7 +988,8 @@ org.apache.spark.sql.AnalysisException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "`a`.`b`.`c`.`d`" + "identifier" : "`a`.`b`.`c`.`d`", + "limit" : "2" }, "queryContext" : [ { "objectType" : "", @@ -1175,29 +1184,28 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException { - "errorClass" : "PARSE_SYNTAX_ERROR", + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "error" : "''x.win''", - "hint" : "" - } + "identifier" : "x.win", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 26, + "stopIndex" : 44, + "fragment" : "IDENTIFIER('x.win')" + } ] } -- !query SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'('", - "hint" : "" - } -} +1 -- !query @@ -1226,33 +1234,17 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException -- !query SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "''a''", - "hint" : "" - } -} +1 -- !query SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "''a''", - "hint" : "" - } -} +1 -- !query @@ -1260,12 +1252,12 @@ SELECT * FROM s.IDENTIFIER('tab') -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME", - "sqlState" : "42000", + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", "messageParameters" : { - "funcName" : "`s`.`IDENTIFIER`" + "relationName" : "`s`.`tab`" }, "queryContext" : [ { "objectType" : "", @@ -1282,14 +1274,20 @@ SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", "messageParameters" : { - "error" : "'.'", - "hint" : "" - } + "relationName" : "`s`.`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 47, + "fragment" : "IDENTIFIER('s').IDENTIFIER('tab')" + } ] } @@ -1298,108 +1296,247 @@ SELECT * FROM IDENTIFIER('s').tab -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", "messageParameters" : { - "error" : "'.'", - "hint" : "" - } + "relationName" : "`s`.`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 33, + "fragment" : "IDENTIFIER('s').tab" + } ] } -- !query SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "''win''", - "hint" : "" - } -} +1 -- !query SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) -- !query schema +struct +-- !query output +1 + + +-- !query +SELECT 1 AS IDENTIFIER('col1') +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) +-- !query schema +struct +-- !query output +1 2 + + +-- !query +WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) +-- !query schema +struct +-- !query output +1 + + +-- !query +CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) +-- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException + + + +-- !query +SELECT c1 FROM v +-- !query schema +struct +-- !query output +1 + + +-- !query +CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT c1 FROM tab +-- !query schema +struct +-- !query output +1 + + +-- !query +ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "error" : "'WINDOW'", - "hint" : "" + "operation" : "RENAME COLUMN", + "tableName" : "`spark_catalog`.`default`.`tab`" } } -- !query -WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) +SELECT col1 FROM tab -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", "messageParameters" : { - "error" : "''v''", - "hint" : "" - } + "objectName" : "`col1`", + "proposal" : "`c1`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 11, + "fragment" : "col1" + } ] } -- !query -INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) +ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException + + + +-- !query +SELECT c2 FROM tab +-- !query schema +struct +-- !query output +NULL + + +-- !query +ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException { - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "error" : "'('", - "hint" : ": missing ')'" + "operation" : "DROP COLUMN", + "tableName" : "`spark_catalog`.`default`.`tab`" } } -- !query -CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) +ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM tab_renamed +-- !query schema +struct +-- !query output +1 NULL + + +-- !query +CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS test_col_with_dot +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) -- !query schema struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException { - "errorClass" : "PARSE_SYNTAX_ERROR", + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "error" : "'('", - "hint" : "" - } + "identifier" : "schema.table", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 65, + "fragment" : "VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2)" + } ] } -- !query -CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV +SELECT 1 AS IDENTIFIER('col1.col2') -- !query schema struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException { - "errorClass" : "PARSE_SYNTAX_ERROR", + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "error" : "'('", - "hint" : "" - } + "identifier" : "col1.col2", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 35, + "fragment" : "1 AS IDENTIFIER('col1.col2')" + } ] } From 2ee8073b6609a477c79cec94258296f6dcf5693d Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 4 Nov 2025 17:30:06 -0800 Subject: [PATCH 04/37] More fixes --- .../sql/catalyst/parser/SqlBaseParser.g4 | 4 +- .../catalyst/parser/DataTypeAstBuilder.scala | 26 + .../identifier-clause.sql.out | 933 ++++++++++++++- .../sql-tests/inputs/identifier-clause.sql | 201 ++++ .../results/identifier-clause.sql.out | 1049 ++++++++++++++++- .../errors/QueryCompilationErrorsSuite.scala | 4 +- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 2 +- 7 files changed, 2188 insertions(+), 31 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index d445ce6f0d22..e627e616174e 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -679,8 +679,8 @@ dmlStatementNoWith ; identifierReference - : multipartIdentifier - | IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN + : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN + | multipartIdentifier ; catalogIdentifierReference diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index a9af0ed4f85a..1440fad4ee35 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -35,6 +35,32 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, /** * AST builder for parsing data type definitions and table schemas. * + * == CRITICAL: Extracting Identifier Names == + * + * When extracting identifier names from parser contexts, you MUST use the helper methods + * provided by this class instead of calling ctx.getText() directly: + * + * - '''getIdentifierText(ctx)''': For single identifiers (column names, aliases, window names) + * - '''getIdentifierParts(ctx)''': For qualified identifiers (table names, schema.table) + * + * '''DO NOT use ctx.getText() or ctx.identifier.getText()''' directly! These methods do not + * handle the IDENTIFIER('literal') syntax and will cause incorrect behavior. + * + * The IDENTIFIER('literal') syntax allows string literals to be used as identifiers at parse + * time (e.g., IDENTIFIER('my_col') resolves to the identifier my_col). If you use getText(), + * you'll get the raw text "IDENTIFIER('my_col')" instead of "my_col", breaking the feature. + * + * Example: + * {{{ + * // WRONG - does not handle IDENTIFIER('literal'): + * val name = ctx.identifier.getText + * SubqueryAlias(ctx.name.getText, plan) + * + * // CORRECT - handles both regular identifiers and IDENTIFIER('literal'): + * val name = getIdentifierText(ctx.identifier) + * SubqueryAlias(getIdentifierText(ctx.name), plan) + * }}} + * * This is a client-side parser designed specifically for parsing data type strings (e.g., "INT", * "STRUCT") and table schemas. It assumes that the input does not contain * parameter markers (`:name` or `?`), as parameter substitution should occur before data types diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index fefe200dc617..3c795a14c102 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -574,14 +574,7 @@ CreateVariable defaultvalueexpression(sometable, 'sometable'), false -- !query CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV -- !query analysis -org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException -{ - "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", - "sqlState" : "42P07", - "messageParameters" : { - "relationName" : "`spark_catalog`.`default`.`sometable`" - } -} +CreateDataSourceTableCommand `spark_catalog`.`default`.`sometable`, false -- !query @@ -611,15 +604,8 @@ SetVariable [variablereference(system.session.var='c1')] -- !query DROP TABLE IDENTIFIER(var || 'table') -- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'var'", - "hint" : "" - } -} +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.sometable -- !query @@ -1431,3 +1417,916 @@ org.apache.spark.sql.catalyst.parser.ParseException "fragment" : "1 AS IDENTIFIER('col1.col2')" } ] } + + +-- !query +CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`test_show`, false + + +-- !query +SHOW VIEWS IN IDENTIFIER('default') +-- !query analysis +ShowViewsCommand default, [namespace#x, viewName#x, isTemporary#x] + + +-- !query +SHOW PARTITIONS IDENTIFIER('test_show') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY", + "sqlState" : "42601", + "messageParameters" : { + "name" : "`spark_catalog`.`default`.`test_show`" + } +} + + +-- !query +SHOW CREATE TABLE IDENTIFIER('test_show') +-- !query analysis +ShowCreateTable false, [createtab_stmt#x] ++- ResolvedTable V2SessionCatalog(spark_catalog), default.test_show, V1Table(default.test_show), [c1#x, c2#x] + + +-- !query +DROP TABLE test_show +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_show + + +-- !query +CREATE TABLE test_desc(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`test_desc`, false + + +-- !query +DESCRIBE TABLE IDENTIFIER('test_desc') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`test_desc`, false, [col_name#x, data_type#x, comment#x] + + +-- !query +DESCRIBE FORMATTED IDENTIFIER('test_desc') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`test_desc`, true, [col_name#x, data_type#x, comment#x] + + +-- !query +DESCRIBE EXTENDED IDENTIFIER('test_desc') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`test_desc`, true, [col_name#x, data_type#x, comment#x] + + +-- !query +DESC IDENTIFIER('test_desc') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`test_desc`, false, [col_name#x, data_type#x, comment#x] + + +-- !query +DROP TABLE test_desc +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_desc + + +-- !query +CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`test_comment`, false + + +-- !query +COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' +-- !query analysis +CommentOnTable table comment ++- ResolvedTable V2SessionCatalog(spark_catalog), default.test_comment, V1Table(default.test_comment), [c1#x, c2#x] + + +-- !query +ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' +-- !query analysis +AlterTableChangeColumnCommand `spark_catalog`.`default`.`test_comment`, c1, StructField(c1,IntegerType,true) + + +-- !query +DROP TABLE test_comment +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_comment + + +-- !query +CREATE SCHEMA test_schema +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [test_schema] + + +-- !query +CREATE TABLE test_schema.test_table(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`test_schema`.`test_table`, false + + +-- !query +ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS +-- !query analysis +AnalyzeTableCommand `spark_catalog`.`test_schema`.`test_table`, false + + +-- !query +REFRESH TABLE IDENTIFIER('test_schema.test_table') +-- !query analysis +RefreshTableCommand `spark_catalog`.`test_schema`.`test_table` + + +-- !query +DESCRIBE IDENTIFIER('test_schema.test_table') +-- !query analysis +DescribeTableCommand `spark_catalog`.`test_schema`.`test_table`, false, [col_name#x, data_type#x, comment#x] + + +-- !query +SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') +-- !query analysis +ShowColumnsCommand `spark_catalog`.`test_schema`.`test_table`, [col_name#x] + + +-- !query +DROP TABLE IDENTIFIER('test_schema.test_table') +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), test_schema.test_table + + +-- !query +DROP SCHEMA test_schema +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [test_schema] + + +-- !query +DECLARE IDENTIFIER('my_var') = 'value' +-- !query analysis +CreateVariable defaultvalueexpression(value, 'value'), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.my_var + + +-- !query +SET VAR IDENTIFIER('my_var') = 'new_value' +-- !query analysis +SetVariable [variablereference(system.session.my_var='value')] ++- Project [new_value AS my_var#x] + +- OneRowRelation + + +-- !query +SELECT IDENTIFIER('my_var') +-- !query analysis +Project [variablereference(system.session.my_var='new_value') AS variablereference(system.session.my_var='new_value')#x] ++- OneRowRelation + + +-- !query +DROP TEMPORARY VARIABLE IDENTIFIER('my_var') +-- !query analysis +DropVariable false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.my_var + + +-- !query +CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) +RETURNS INT +RETURN IDENTIFIER('param1') + length(IDENTIFIER('param2')) +-- !query analysis +CreateSQLFunctionCommand test_udf, IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING, INT, IDENTIFIER('param1') + length(IDENTIFIER('param2')), false, true, false, false + + +-- !query +SELECT test_udf(5, 'hello') +-- !query analysis +Project [test_udf(param1#x, param2#x) AS test_udf(5, hello)#x] ++- Project [cast(5 as int) AS param1#x, cast(hello as string) AS param2#x] + +- OneRowRelation + + +-- !query +DROP TEMPORARY FUNCTION test_udf +-- !query analysis +DropFunctionCommand test_udf, false, true + + +-- !query +CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) +RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) +RETURN SELECT IDENTIFIER('input_val'), 'result' +-- !query analysis +CreateSQLFunctionCommand test_table_udf, IDENTIFIER('input_val') INT, IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING, SELECT IDENTIFIER('input_val'), 'result', true, true, false, false + + +-- !query +SELECT * FROM test_table_udf(42) +-- !query analysis +Project [col1#x, col2#x] ++- SQLFunctionNode test_table_udf + +- SubqueryAlias test_table_udf + +- Project [cast(input_val#x as int) AS col1#x, cast(result#x as string) AS col2#x] + +- Project [cast(42 as int) AS input_val#x, result AS result#x] + +- OneRowRelation + + +-- !query +DROP TEMPORARY FUNCTION test_table_udf +-- !query analysis +DropFunctionCommand test_table_udf, false, true + + +-- !query +BEGIN + IDENTIFIER('loop_label'): LOOP + SELECT 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +LEAVE IDENTIFIER('loop_label') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'LEAVE'", + "hint" : "" + } +} + + +-- !query +END LOOP loop_label +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + block_label: BEGIN + DECLARE IDENTIFIER('x') INT DEFAULT 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +SELECT x +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`x`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 8, + "fragment" : "x" + } ] +} + + +-- !query +END IDENTIFIER('block_label') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + DECLARE IDENTIFIER('counter') INT DEFAULT 0 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO + SET VAR counter = IDENTIFIER('counter') + 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +END WHILE while_label +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('counter') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`counter`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 28, + "fragment" : "IDENTIFIER('counter')" + } ] +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + DECLARE IDENTIFIER('cnt') INT DEFAULT 0 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +repeat_label: REPEAT + SET VAR IDENTIFIER('cnt') = cnt + 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'repeat_label'", + "hint" : "" + } +} + + +-- !query +UNTIL IDENTIFIER('cnt') >= 2 + END REPEAT IDENTIFIER('repeat_label') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'UNTIL'", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('cnt') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`cnt`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 24, + "fragment" : "IDENTIFIER('cnt')" + } ] +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO + SELECT row.c1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +END FOR IDENTIFIER('for_label') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_EXTRACT_BASE_FIELD_TYPE", + "sqlState" : "42000", + "messageParameters" : { + "base" : "\"variablereference(system.session.tab='T')\"", + "other" : "\"STRING\"" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' + USING 'c1' AS col1 +-- !query analysis +Project [c1#x.c2 AS c1.c2#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`integration_test`, false + + +-- !query +INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test], Append, `spark_catalog`.`default`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test), [c1, c2] ++- Project [col1#x AS c1#x, col2#x AS c2#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' + USING 'default' AS schema, 'integration_test' AS table +-- !query analysis +Project [c1#x, c2#x] ++- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' + USING 'c' AS prefix +-- !query analysis +Project [c1#x, c2#x] ++- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' + USING 'c1' AS col, 1 AS val +-- !query analysis +Project [c1#x, c2#x] ++- Filter (c1#x = 1) + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`integration_test2`, false + + +-- !query +INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test2, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test2], Append, `spark_catalog`.`default`.`integration_test2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test2), [c1, c3] ++- Project [col1#x AS c1#x, col2#x AS c3#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' + USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col +-- !query analysis +Project [c1#x, c2#x, c1#x, c3#x] ++- Project [c1#x, c2#x, c3#x, c1#x] + +- Join Inner, (c1#x = c1#x) + :- SubqueryAlias t1 + : +- SubqueryAlias spark_catalog.default.integration_test + : +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + +- SubqueryAlias t2 + +- SubqueryAlias spark_catalog.default.integration_test2 + +- Relation spark_catalog.default.integration_test2[c1#x,c3#x] csv + + +-- !query +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) ORDER BY IDENTIFIER(:ord)) as rn FROM integration_test' + USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord +-- !query analysis +Project [c1#x, c2#x, rn#x] ++- Project [c1#x, c2#x, rn#x, rn#x] + +- Window [row_number() windowspecdefinition(c2#x, c1#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#x], [c2#x], [c1#x ASC NULLS FIRST] + +- Project [c1#x, c2#x] + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' + USING 'c' AS prefix, 'count' AS agg, 'c1' AS col +-- !query analysis +Aggregate [c2#x], [c2#x, count(c1#x) AS count(c1)#xL] ++- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' + USING 'c1' AS col1, 'c2' AS col2 +-- !query analysis +Sort [c1#x DESC NULLS LAST, c2#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x] + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' + USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2 +-- !query analysis +CommandResult Execute InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test], Append, `spark_catalog`.`default`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test), [c1, c2] + +- InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test], Append, `spark_catalog`.`default`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test), [c1, c2] + +- Project [c1#x AS c1#x, c2#x AS c2#x] + +- Project [col1#x AS c1#x, col2#x AS c2#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' + USING 'default' AS schema, 'my_table' AS table, 't' AS alias +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", + "sqlState" : "42601", + "messageParameters" : { + "identifier" : "default.my_table", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 115, + "stopIndex" : 144, + "fragment" : "IDENTIFIER(:schema '.' :table)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' + USING 'my_cte' AS cte_name +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias my_cte +: +- Project [col1#x AS c1#x] +: +- LocalRelation [col1#x] ++- Project [c1#x] + +- SubqueryAlias my_cte + +- CTERelationRef xxxx, true, [c1#x], false, false, 1 + + +-- !query +EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' + USING 'test_view' AS view_name, 'test_col' AS col_name +-- !query analysis +CommandResult Execute CreateViewCommand + +- CreateViewCommand `test_view`, [(test_col,None)], VALUES(1), false, true, LocalTempView, UNSUPPORTED, true + +- LocalRelation [col1#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' + USING 'test_col' AS col, 'test_view' AS view +-- !query analysis +Project [test_col#x] ++- SubqueryAlias test_view + +- View (`test_view`, [test_col#x]) + +- Project [cast(col1#x as int) AS test_col#x] + +- LocalRelation [col1#x] + + +-- !query +DROP VIEW test_view +-- !query analysis +DropTempViewCommand test_view + + +-- !query +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' + USING 'integration_test' AS tab, 'c4' AS new_col +-- !query analysis +CommandResult Execute AlterTableAddColumnsCommand + +- AlterTableAddColumnsCommand `spark_catalog`.`default`.`integration_test`, [StructField(c4,IntegerType,true)] + + +-- !query +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' + USING 'integration_test' AS tab, 'c4' AS old_col, 'c5' AS new_col +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", + "messageParameters" : { + "operation" : "RENAME COLUMN", + "tableName" : "`spark_catalog`.`default`.`integration_test`" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' + USING 'mykey' AS key, 42 AS val +-- !query analysis +Project [map(mykey, 42)[mykey] AS result#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' + USING 't' AS alias +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`t`.`c1`", + "proposal" : "`IDENTIFIER('t')`.`c1`, `IDENTIFIER('t')`.`c2`, `IDENTIFIER('t')`.`c4`" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 31, + "fragment" : "IDENTIFIER(:alias '.c1')" + } ] +} + + +-- !query +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' + USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab +-- !query analysis +Sort [c1#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x] + +- Filter (c1#x > 0) + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x,c4#x] csv + + +-- !query +EXECUTE IMMEDIATE + 'BEGIN + DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'''", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER(:var_name) AS result +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNBOUND_SQL_PARAMETER", + "sqlState" : "42P02", + "messageParameters" : { + "name" : "var_name" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 27, + "fragment" : ":var_name" + } ] +} + + +-- !query +END' + USING 'my_variable' AS var_name, 100 AS var_value +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' + USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias +-- !query analysis +Project [c1#x, c2#x, c4#x] ++- Filter (c1#x > 0) + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x,c4#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' + USING 'default' AS schema, 'col1' AS col +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", + "sqlState" : "42601", + "messageParameters" : { + "identifier" : "default.col1", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 40, + "fragment" : "1 AS IDENTIFIER(:schema '.' :col)" + } ] +} + + +-- !query +DROP TABLE integration_test +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.integration_test + + +-- !query +DROP TABLE integration_test2 +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.integration_test2 diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index 5bfa1da6a848..221025587de3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -190,3 +190,204 @@ DROP TABLE IF EXISTS test_col_with_dot; SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2); -- Identifier-lite: column alias with qualified name should error (column alias must be single) SELECT 1 AS IDENTIFIER('col1.col2'); + +-- Additional coverage: SHOW commands with identifier-lite +CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV; +SHOW VIEWS IN IDENTIFIER('default'); +SHOW PARTITIONS IDENTIFIER('test_show'); +SHOW CREATE TABLE IDENTIFIER('test_show'); +DROP TABLE test_show; + +-- SET CATALOG with identifier-lite +-- SET CATALOG IDENTIFIER('spark_catalog'); + +-- DESCRIBE with different forms +CREATE TABLE test_desc(c1 INT) USING CSV; +DESCRIBE TABLE IDENTIFIER('test_desc'); +DESCRIBE FORMATTED IDENTIFIER('test_desc'); +DESCRIBE EXTENDED IDENTIFIER('test_desc'); +DESC IDENTIFIER('test_desc'); +DROP TABLE test_desc; + +-- COMMENT ON COLUMN with identifier-lite +CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV; +COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment'; +ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment'; +DROP TABLE test_comment; + +-- Additional identifier-lite tests with qualified table names in various commands +CREATE SCHEMA test_schema; +CREATE TABLE test_schema.test_table(c1 INT) USING CSV; +ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS; +REFRESH TABLE IDENTIFIER('test_schema.test_table'); +DESCRIBE IDENTIFIER('test_schema.test_table'); +SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table'); +DROP TABLE IDENTIFIER('test_schema.test_table'); +DROP SCHEMA test_schema; + +-- Session variables with identifier-lite +DECLARE IDENTIFIER('my_var') = 'value'; +SET VAR IDENTIFIER('my_var') = 'new_value'; +SELECT IDENTIFIER('my_var'); +DROP TEMPORARY VARIABLE IDENTIFIER('my_var'); + +-- SQL UDF with identifier-lite in parameter names and return statement +CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) +RETURNS INT +RETURN IDENTIFIER('param1') + length(IDENTIFIER('param2')); + +SELECT test_udf(5, 'hello'); +DROP TEMPORARY FUNCTION test_udf; + +-- SQL UDF with table return type using identifier-lite +CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) +RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) +RETURN SELECT IDENTIFIER('input_val'), 'result'; + +SELECT * FROM test_table_udf(42); +DROP TEMPORARY FUNCTION test_table_udf; + +-- SQL Script labels with identifier-lite +BEGIN + IDENTIFIER('loop_label'): LOOP + SELECT 1; + LEAVE IDENTIFIER('loop_label'); + END LOOP loop_label; +END; + +-- SQL Script with labeled BEGIN/END block +BEGIN + block_label: BEGIN + DECLARE IDENTIFIER('x') INT DEFAULT 1; + SELECT x; + END IDENTIFIER('block_label'); +END; + +-- WHILE loop with identifier-lite label +BEGIN + DECLARE IDENTIFIER('counter') INT DEFAULT 0; + IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO + SET VAR counter = IDENTIFIER('counter') + 1; + END WHILE while_label; + SELECT IDENTIFIER('counter'); +END; + +-- REPEAT loop with identifier-lite label +BEGIN + DECLARE IDENTIFIER('cnt') INT DEFAULT 0; + repeat_label: REPEAT + SET VAR IDENTIFIER('cnt') = cnt + 1; + UNTIL IDENTIFIER('cnt') >= 2 + END REPEAT IDENTIFIER('repeat_label'); + SELECT IDENTIFIER('cnt'); +END; + +-- FOR loop with identifier-lite +BEGIN + IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO + SELECT row.c1; + END FOR IDENTIFIER('for_label'); +END; + +-- Integration tests: Combining parameter markers, string coalescing, and IDENTIFIER +-- These tests demonstrate the power of combining IDENTIFIER with parameters + +-- Test 1: IDENTIFIER with parameter marker for table name +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab; + +-- Test 2: IDENTIFIER with string coalescing for column name +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' + USING 'c1' AS col1; + +-- Test 3: IDENTIFIER with parameter and string literal coalescing for qualified table name +CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV; +INSERT INTO integration_test VALUES (1, 'a'), (2, 'b'); +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' + USING 'default' AS schema, 'integration_test' AS table; + +-- Test 4: IDENTIFIER in column reference with parameter and string coalescing +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' + USING 'c' AS prefix; + +-- Test 5: IDENTIFIER in WHERE clause with parameters +EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' + USING 'c1' AS col, 1 AS val; + +-- Test 6: IDENTIFIER in JOIN with parameters for table and column names +CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV; +INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y'); +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' + USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col; + +-- Test 7: IDENTIFIER in window function with parameter for partition column +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) ORDER BY IDENTIFIER(:ord)) as rn FROM integration_test' + USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord; + +-- Test 8: IDENTIFIER in aggregate function with string coalescing +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' + USING 'c' AS prefix, 'count' AS agg, 'c1' AS col; + +-- Test 9: IDENTIFIER in ORDER BY with multiple parameters +EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' + USING 'c1' AS col1, 'c2' AS col2; + +-- Test 10: IDENTIFIER in INSERT with parameter for column name +EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' + USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2; + +-- Test 11: Complex - IDENTIFIER with nested string operations +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' + USING 'default' AS schema, 'my_table' AS table, 't' AS alias; + +-- Test 12: IDENTIFIER in CTE name with parameter +EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' + USING 'my_cte' AS cte_name; + +-- Test 13: IDENTIFIER in view name with parameter +EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' + USING 'test_view' AS view_name, 'test_col' AS col_name; +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' + USING 'test_col' AS col, 'test_view' AS view; +DROP VIEW test_view; + +-- Test 14: IDENTIFIER in ALTER TABLE with parameters +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' + USING 'integration_test' AS tab, 'c4' AS new_col; +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' + USING 'integration_test' AS tab, 'c4' AS old_col, 'c5' AS new_col; + +-- Test 15: IDENTIFIER with dereference using parameters +EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' + USING 'mykey' AS key, 42 AS val; + +-- Test 16: IDENTIFIER in table alias with string coalescing +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' + USING 't' AS alias; + +-- Test 17: Multiple IDENTIFIER clauses with different parameter combinations +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' + USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab; + +-- Test 18: IDENTIFIER in DECLARE and SELECT within EXECUTE IMMEDIATE +EXECUTE IMMEDIATE + 'BEGIN + DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value; + SELECT IDENTIFIER(:var_name) AS result; + END' + USING 'my_variable' AS var_name, 100 AS var_value; + +-- Test 19: IDENTIFIER with qualified name coalescing for schema.table.column pattern +-- This should work for multi-part identifiers +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' + USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias; + +-- Test 20: Error case - IDENTIFIER with too many parts from parameter coalescing +-- This should error as column alias must be single identifier +EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' + USING 'default' AS schema, 'col1' AS col; + +-- Cleanup +DROP TABLE integration_test; +DROP TABLE integration_test2; diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index b646bd2e8cf5..387b4e04af4a 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -704,15 +704,7 @@ DROP TABLE IDENTIFIER(var || 'table') -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'var'", - "hint" : "" - } -} + -- !query @@ -1540,3 +1532,1042 @@ org.apache.spark.sql.catalyst.parser.ParseException "fragment" : "1 AS IDENTIFIER('col1.col2')" } ] } + + +-- !query +CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +SHOW VIEWS IN IDENTIFIER('default') +-- !query schema +struct +-- !query output +v + + +-- !query +SHOW PARTITIONS IDENTIFIER('test_show') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY", + "sqlState" : "42601", + "messageParameters" : { + "name" : "`spark_catalog`.`default`.`test_show`" + } +} + + +-- !query +SHOW CREATE TABLE IDENTIFIER('test_show') +-- !query schema +struct +-- !query output +CREATE TABLE spark_catalog.default.test_show ( + c1 INT, + c2 STRING) +USING CSV + + +-- !query +DROP TABLE test_show +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE test_desc(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +DESCRIBE TABLE IDENTIFIER('test_desc') +-- !query schema +struct +-- !query output +c1 int + + +-- !query +DESCRIBE FORMATTED IDENTIFIER('test_desc') +-- !query schema +struct +-- !query output +c1 int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table test_desc +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider CSV +Location [not included in comparison]/{warehouse_dir}/test_desc + + +-- !query +DESCRIBE EXTENDED IDENTIFIER('test_desc') +-- !query schema +struct +-- !query output +c1 int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table test_desc +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider CSV +Location [not included in comparison]/{warehouse_dir}/test_desc + + +-- !query +DESC IDENTIFIER('test_desc') +-- !query schema +struct +-- !query output +c1 int + + +-- !query +DROP TABLE test_desc +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' +-- !query schema +struct<> +-- !query output + + + +-- !query +ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE test_comment +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA test_schema +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE test_schema.test_table(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS +-- !query schema +struct<> +-- !query output + + + +-- !query +REFRESH TABLE IDENTIFIER('test_schema.test_table') +-- !query schema +struct<> +-- !query output + + + +-- !query +DESCRIBE IDENTIFIER('test_schema.test_table') +-- !query schema +struct +-- !query output +c1 int + + +-- !query +SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') +-- !query schema +struct +-- !query output +c1 + + +-- !query +DROP TABLE IDENTIFIER('test_schema.test_table') +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA test_schema +-- !query schema +struct<> +-- !query output + + + +-- !query +DECLARE IDENTIFIER('my_var') = 'value' +-- !query schema +struct<> +-- !query output + + + +-- !query +SET VAR IDENTIFIER('my_var') = 'new_value' +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT IDENTIFIER('my_var') +-- !query schema +struct +-- !query output +new_value + + +-- !query +DROP TEMPORARY VARIABLE IDENTIFIER('my_var') +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) +RETURNS INT +RETURN IDENTIFIER('param1') + length(IDENTIFIER('param2')) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT test_udf(5, 'hello') +-- !query schema +struct +-- !query output +10 + + +-- !query +DROP TEMPORARY FUNCTION test_udf +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) +RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) +RETURN SELECT IDENTIFIER('input_val'), 'result' +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM test_table_udf(42) +-- !query schema +struct +-- !query output +42 result + + +-- !query +DROP TEMPORARY FUNCTION test_table_udf +-- !query schema +struct<> +-- !query output + + + +-- !query +BEGIN + IDENTIFIER('loop_label'): LOOP + SELECT 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +LEAVE IDENTIFIER('loop_label') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'LEAVE'", + "hint" : "" + } +} + + +-- !query +END LOOP loop_label +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + block_label: BEGIN + DECLARE IDENTIFIER('x') INT DEFAULT 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +SELECT x +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`x`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 8, + "fragment" : "x" + } ] +} + + +-- !query +END IDENTIFIER('block_label') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + DECLARE IDENTIFIER('counter') INT DEFAULT 0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO + SET VAR counter = IDENTIFIER('counter') + 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +END WHILE while_label +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('counter') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`counter`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 28, + "fragment" : "IDENTIFIER('counter')" + } ] +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + DECLARE IDENTIFIER('cnt') INT DEFAULT 0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +repeat_label: REPEAT + SET VAR IDENTIFIER('cnt') = cnt + 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'repeat_label'", + "hint" : "" + } +} + + +-- !query +UNTIL IDENTIFIER('cnt') >= 2 + END REPEAT IDENTIFIER('repeat_label') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'UNTIL'", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('cnt') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`cnt`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 24, + "fragment" : "IDENTIFIER('cnt')" + } ] +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO + SELECT row.c1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +END FOR IDENTIFIER('for_label') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_EXTRACT_BASE_FIELD_TYPE", + "sqlState" : "42000", + "messageParameters" : { + "base" : "\"variablereference(system.session.tab='T')\"", + "other" : "\"STRING\"" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' + USING 'c1' AS col1 +-- !query schema +struct +-- !query output +42 + + +-- !query +CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') +-- !query schema +struct<> +-- !query output + + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' + USING 'default' AS schema, 'integration_test' AS table +-- !query schema +struct +-- !query output +1 a +2 b + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' + USING 'c' AS prefix +-- !query schema +struct +-- !query output +1 a +2 b + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' + USING 'c1' AS col, 1 AS val +-- !query schema +struct +-- !query output +1 a + + +-- !query +CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') +-- !query schema +struct<> +-- !query output + + + +-- !query +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' + USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col +-- !query schema +struct +-- !query output +1 a 1 x +2 b 2 y + + +-- !query +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) ORDER BY IDENTIFIER(:ord)) as rn FROM integration_test' + USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord +-- !query schema +struct +-- !query output +1 a 1 +2 b 1 + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' + USING 'c' AS prefix, 'count' AS agg, 'c1' AS col +-- !query schema +struct +-- !query output +a 1 +b 1 + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' + USING 'c1' AS col1, 'c2' AS col2 +-- !query schema +struct +-- !query output +2 b +1 a + + +-- !query +EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' + USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2 +-- !query schema +struct<> +-- !query output + + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' + USING 'default' AS schema, 'my_table' AS table, 't' AS alias +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", + "sqlState" : "42601", + "messageParameters" : { + "identifier" : "default.my_table", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 115, + "stopIndex" : 144, + "fragment" : "IDENTIFIER(:schema '.' :table)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' + USING 'my_cte' AS cte_name +-- !query schema +struct +-- !query output +1 + + +-- !query +EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' + USING 'test_view' AS view_name, 'test_col' AS col_name +-- !query schema +struct<> +-- !query output + + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' + USING 'test_col' AS col, 'test_view' AS view +-- !query schema +struct +-- !query output +1 + + +-- !query +DROP VIEW test_view +-- !query schema +struct<> +-- !query output + + + +-- !query +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' + USING 'integration_test' AS tab, 'c4' AS new_col +-- !query schema +struct<> +-- !query output + + + +-- !query +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' + USING 'integration_test' AS tab, 'c4' AS old_col, 'c5' AS new_col +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", + "messageParameters" : { + "operation" : "RENAME COLUMN", + "tableName" : "`spark_catalog`.`default`.`integration_test`" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' + USING 'mykey' AS key, 42 AS val +-- !query schema +struct +-- !query output +42 + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' + USING 't' AS alias +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`t`.`c1`", + "proposal" : "`IDENTIFIER('t')`.`c1`, `IDENTIFIER('t')`.`c2`, `IDENTIFIER('t')`.`c4`" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 31, + "fragment" : "IDENTIFIER(:alias '.c1')" + } ] +} + + +-- !query +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' + USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab +-- !query schema +struct +-- !query output +1 a +2 b +3 c + + +-- !query +EXECUTE IMMEDIATE + 'BEGIN + DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'''", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER(:var_name) AS result +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNBOUND_SQL_PARAMETER", + "sqlState" : "42P02", + "messageParameters" : { + "name" : "var_name" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 27, + "fragment" : ":var_name" + } ] +} + + +-- !query +END' + USING 'my_variable' AS var_name, 100 AS var_value +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' + USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias +-- !query schema +struct +-- !query output +1 a NULL +2 b NULL +3 c NULL + + +-- !query +EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' + USING 'default' AS schema, 'col1' AS col +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", + "sqlState" : "42601", + "messageParameters" : { + "identifier" : "default.col1", + "limit" : "1" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 40, + "fragment" : "1 AS IDENTIFIER(:schema '.' :col)" + } ] +} + + +-- !query +DROP TABLE integration_test +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE integration_test2 +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 9e16c5b2a3dd..45a656f7a6d5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -720,7 +720,7 @@ class QueryCompilationErrorsSuite }, condition = "IDENTIFIER_TOO_MANY_NAME_PARTS", sqlState = "42601", - parameters = Map("identifier" -> "`db_name`.`schema_name`.`view_name`") + parameters = Map("identifier" -> "`db_name`.`schema_name`.`view_name`", "limit" -> "2") ) } @@ -741,7 +741,7 @@ class QueryCompilationErrorsSuite }, condition = "IDENTIFIER_TOO_MANY_NAME_PARTS", sqlState = "42601", - parameters = Map("identifier" -> "`db_name`.`schema_name`.`new_table_name`") + parameters = Map("identifier" -> "`db_name`.`schema_name`.`new_table_name`", "limit" -> "2") ) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index c6a48d98fa98..fd88559d4f98 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -3078,7 +3078,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel }, condition = "IDENTIFIER_TOO_MANY_NAME_PARTS", sqlState = "42601", - parameters = Map("identifier" -> "`db_name`.`schema_name`.`function_name`") + parameters = Map("identifier" -> "`db_name`.`schema_name`.`function_name`", "limit" -> "2") ) } finally { JdbcDialects.unregisterDialect(testH2Dialect) From ebe421aafc1acca5addc6950fb9f955a70546558 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 5 Nov 2025 09:05:17 -0800 Subject: [PATCH 05/37] Introduce SQLCOnf --- .../sql/catalyst/parser/SqlBaseParser.g4 | 13 +- .../spark/sql/catalyst/parser/parsers.scala | 1 + .../spark/sql/internal/SqlApiConf.scala | 2 + .../sql/catalyst/parser/AstBuilder.scala | 9 +- .../apache/spark/sql/internal/SQLConf.scala | 14 + .../identifier-clause-legacy.sql.out | 2566 +++++++++++++++ .../inputs/identifier-clause-legacy.sql | 2 + .../results/identifier-clause-legacy.sql.out | 2891 +++++++++++++++++ 8 files changed, 5493 insertions(+), 5 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/inputs/identifier-clause-legacy.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index e627e616174e..b302439ef6fb 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -46,6 +46,13 @@ options { tokenVocab = SqlBaseLexer; } * When true, parameter markers are allowed everywhere a literal is supported. */ public boolean parameter_substitution_enabled = true; + + /** + * When false (default), IDENTIFIER('literal') is resolved to an identifier at parse time (identifier-lite). + * When true, only the legacy IDENTIFIER(expression) function syntax is allowed. + * Controlled by spark.sql.legacy.identifierClause configuration. + */ + public boolean legacy_identifier_clause_only = false; } compoundOrSingleStatement @@ -1586,7 +1593,7 @@ qualifiedName // valid expressions such as "a-b" can be recognized as an identifier errorCapturingIdentifier : identifier errorCapturingIdentifierExtra #errorCapturingIdentifierBase - | IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN errorCapturingIdentifierExtra #identifierLiteralWithExtra + | {!legacy_identifier_clause_only}? IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN errorCapturingIdentifierExtra #identifierLiteralWithExtra ; // extra left-factoring grammar @@ -1603,7 +1610,7 @@ identifier strictIdentifier : IDENTIFIER #unquotedIdentifier | quotedIdentifier #quotedIdentifierAlternative - | IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN #identifierLiteral + | {!legacy_identifier_clause_only}? IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN #identifierLiteral | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier ; @@ -1719,7 +1726,7 @@ singleStringLitWithoutMarker singleStringLit : singleStringLitWithoutMarker | parameterMarker - ; +; parameterMarker : {parameter_substitution_enabled}? namedParameterMarker #namedParameterMarkerRule diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala index e2e320be3654..281124306a50 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala @@ -476,6 +476,7 @@ object AbstractParser extends Logging { parser.SQL_standard_keyword_behavior = conf.enforceReservedKeywords parser.double_quoted_identifiers = conf.doubleQuotedIdentifiers parser.parameter_substitution_enabled = !conf.legacyParameterSubstitutionConstantsOnly + parser.legacy_identifier_clause_only = conf.legacyIdentifierClauseOnly } /** diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala index f715f8f9ed8c..fd2a2c0ee5b2 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala @@ -51,6 +51,7 @@ private[sql] trait SqlApiConf { def parserDfaCacheFlushThreshold: Int def parserDfaCacheFlushRatio: Double def legacyParameterSubstitutionConstantsOnly: Boolean + def legacyIdentifierClauseOnly: Boolean } private[sql] object SqlApiConf { @@ -104,4 +105,5 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf { override def parserDfaCacheFlushThreshold: Int = -1 override def parserDfaCacheFlushRatio: Double = -1.0 override def legacyParameterSubstitutionConstantsOnly: Boolean = false + override def legacyIdentifierClauseOnly: Boolean = false } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 97061eb0422e..773f95109009 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3494,13 +3494,18 @@ class AstBuilder extends DataTypeAstBuilder * quoted in `` */ override def visitColumnReference(ctx: ColumnReferenceContext): Expression = withOrigin(ctx) { - ctx.getStart.getText match { + // For regex check, we need the original text before identifier-lite resolution + val originalText = ctx.getStart.getText + originalText match { case escapedIdentifier(columnNameRegex) if conf.supportQuotedRegexColumnName && isRegex(columnNameRegex) && canApplyRegex(ctx) => UnresolvedRegex(columnNameRegex, None, conf.caseSensitiveAnalysis) case _ => - UnresolvedAttribute.quoted(ctx.getText) + // Use getIdentifierParts to handle IDENTIFIER('literal') correctly + // This allows IDENTIFIER('t').c1 to work like t.c1 + val parts = getIdentifierParts(ctx.identifier()) + UnresolvedAttribute(parts) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d88cbe326cfb..f3e0ced77009 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -4946,6 +4946,17 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_IDENTIFIER_CLAUSE_ONLY = + buildConf("spark.sql.legacy.identifierClause") + .internal() + .doc("When set to false, IDENTIFIER('literal') is resolved to an identifier at parse time " + + "anywhere identifiers can occur. When set to true, only the legacy " + + " IDENTIFIER(constantExpr) clause is allowed, which evaluates the expression at analysis " + + " and is limited to a narrow subset of scenarios.") + .version("4.1.0") + .booleanConf + .createWithDefault(false) + val LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED = buildConf("spark.sql.legacy.allowNegativeScaleOfDecimal") .internal() @@ -7609,6 +7620,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf { override def legacyParameterSubstitutionConstantsOnly: Boolean = getConf(SQLConf.LEGACY_PARAMETER_SUBSTITUTION_CONSTANTS_ONLY) + override def legacyIdentifierClauseOnly: Boolean = + getConf(SQLConf.LEGACY_IDENTIFIER_CLAUSE_ONLY) + def streamStatePollingInterval: Long = getConf(SQLConf.PIPELINES_STREAM_STATE_POLLING_INTERVAL) def watchdogMinRetryTimeInSeconds: Long = { diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out new file mode 100644 index 000000000000..bfdaa6f3a864 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out @@ -0,0 +1,2566 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SET hivevar:colname = 'c' +-- !query analysis +SetCommand (hivevar:colname,Some('c')) + + +-- !query +SELECT IDENTIFIER(${colname} || '_1') FROM VALUES(1) AS T(c_1) +-- !query analysis +Project [c_1#x] ++- SubqueryAlias T + +- LocalRelation [c_1#x] + + +-- !query +SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1) +-- !query analysis +Project [c1#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1) +-- !query analysis +Project [c1#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1) +-- !query analysis +Project [c1#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`) +-- !query analysis +Project [c 1#x] ++- SubqueryAlias T + +- LocalRelation [c 1#x] + + +-- !query +SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``) +-- !query analysis +Project [#x] ++- SubqueryAlias T + +- LocalRelation [#x] + + +-- !query +SELECT IDENTIFIER('c' || '1') FROM VALUES(1) AS T(c1) +-- !query analysis +Project [c1#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +CREATE SCHEMA IF NOT EXISTS s +-- !query analysis +CreateNamespace true ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [s] + + +-- !query +CREATE TABLE s.tab(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`s`.`tab`, false + + +-- !query +USE SCHEMA s +-- !query analysis +SetNamespaceCommand [s] + + +-- !query +INSERT INTO IDENTIFIER('ta' || 'b') VALUES(1) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/s.db/tab, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/s.db/tab], Append, `spark_catalog`.`s`.`tab`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/s.db/tab), [c1] ++- Project [col1#x AS c1#x] + +- LocalRelation [col1#x] + + +-- !query +DELETE FROM IDENTIFIER('ta' || 'b') WHERE 1=0 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", + "messageParameters" : { + "operation" : "DELETE", + "tableName" : "`spark_catalog`.`s`.`tab`" + } +} + + +-- !query +UPDATE IDENTIFIER('ta' || 'b') SET c1 = 2 +-- !query analysis +org.apache.spark.SparkUnsupportedOperationException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "messageParameters" : { + "ddl" : "UPDATE TABLE" + } +} + + +-- !query +MERGE INTO IDENTIFIER('ta' || 'b') AS t USING IDENTIFIER('ta' || 'b') AS s ON s.c1 = t.c1 + WHEN MATCHED THEN UPDATE SET c1 = 3 +-- !query analysis +org.apache.spark.SparkUnsupportedOperationException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "messageParameters" : { + "ddl" : "MERGE INTO TABLE" + } +} + + +-- !query +SELECT * FROM IDENTIFIER('tab') +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.s.tab + +- Relation spark_catalog.s.tab[c1#x] csv + + +-- !query +SELECT * FROM IDENTIFIER('s.tab') +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.s.tab + +- Relation spark_catalog.s.tab[c1#x] csv + + +-- !query +SELECT * FROM IDENTIFIER('`s`.`tab`') +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.s.tab + +- Relation spark_catalog.s.tab[c1#x] csv + + +-- !query +SELECT * FROM IDENTIFIER('t' || 'a' || 'b') +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.s.tab + +- Relation spark_catalog.s.tab[c1#x] csv + + +-- !query +USE SCHEMA default +-- !query analysis +SetNamespaceCommand [default] + + +-- !query +DROP TABLE s.tab +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), s.tab + + +-- !query +DROP SCHEMA s +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [s] + + +-- !query +SELECT IDENTIFIER('COAL' || 'ESCE')(NULL, 1) +-- !query analysis +Project [coalesce(cast(null as int), 1) AS coalesce(NULL, 1)#x] ++- OneRowRelation + + +-- !query +SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1) +-- !query analysis +Project [abs(c1#x) AS abs(c1)#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1) +-- !query analysis +Project [id#xL] ++- Range (0, 1, step=1) + + +-- !query +CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`tab`, false + + +-- !query +DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b') +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tab + + +-- !query +CREATE SCHEMA identifier_clauses +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clauses] + + +-- !query +USE identifier_clauses +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clauses] + + +-- !query +CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`identifier_clauses`.`tab`, false + + +-- !query +DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' || 'tab') +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clauses.tab + + +-- !query +CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`identifier_clauses`.`tab`, false + + +-- !query +REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", + "messageParameters" : { + "operation" : "REPLACE TABLE", + "tableName" : "`spark_catalog`.`identifier_clauses`.`tab`" + } +} + + +-- !query +CACHE TABLE IDENTIFIER('ta' || 'b') +-- !query analysis +CacheTable [tab], false, true + +- SubqueryAlias spark_catalog.identifier_clauses.tab + +- Relation spark_catalog.identifier_clauses.tab[c1#x] csv + + +-- !query +UNCACHE TABLE IDENTIFIER('ta' || 'b') +-- !query analysis +UncacheTable false, true + +- SubqueryAlias spark_catalog.identifier_clauses.tab + +- Relation spark_catalog.identifier_clauses.tab[c1#x] csv + + +-- !query +DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b') +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clauses.tab + + +-- !query +USE default +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [default] + + +-- !query +DROP SCHEMA identifier_clauses +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clauses] + + +-- !query +CREATE TABLE tab(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`tab`, false + + +-- !query +INSERT INTO tab VALUES (1) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/tab, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/tab], Append, `spark_catalog`.`default`.`tab`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/tab), [c1] ++- Project [col1#x AS c1#x] + +- LocalRelation [col1#x] + + +-- !query +SELECT c1 FROM tab +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.default.tab + +- Relation spark_catalog.default.tab[c1#x] csv + + +-- !query +DESCRIBE IDENTIFIER('ta' || 'b') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`tab`, false, [col_name#x, data_type#x, comment#x] + + +-- !query +ANALYZE TABLE IDENTIFIER('ta' || 'b') COMPUTE STATISTICS +-- !query analysis +AnalyzeTableCommand `spark_catalog`.`default`.`tab`, false + + +-- !query +ALTER TABLE IDENTIFIER('ta' || 'b') ADD COLUMN c2 INT +-- !query analysis +AlterTableAddColumnsCommand `spark_catalog`.`default`.`tab`, [StructField(c2,IntegerType,true)] + + +-- !query +SHOW TBLPROPERTIES IDENTIFIER('ta' || 'b') +-- !query analysis +ShowTableProperties [key#x, value#x] ++- ResolvedTable V2SessionCatalog(spark_catalog), default.tab, V1Table(default.tab), [c1#x, c2#x] + + +-- !query +SHOW COLUMNS FROM IDENTIFIER('ta' || 'b') +-- !query analysis +ShowColumnsCommand `spark_catalog`.`default`.`tab`, [col_name#x] + + +-- !query +COMMENT ON TABLE IDENTIFIER('ta' || 'b') IS 'hello' +-- !query analysis +CommentOnTable hello ++- ResolvedTable V2SessionCatalog(spark_catalog), default.tab, V1Table(default.tab), [c1#x, c2#x] + + +-- !query +REFRESH TABLE IDENTIFIER('ta' || 'b') +-- !query analysis +RefreshTableCommand `spark_catalog`.`default`.`tab` + + +-- !query +REPAIR TABLE IDENTIFIER('ta' || 'b') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_PARTITIONED_TABLE", + "sqlState" : "42809", + "messageParameters" : { + "operation" : "MSCK REPAIR TABLE", + "tableIdentWithDB" : "`spark_catalog`.`default`.`tab`" + } +} + + +-- !query +TRUNCATE TABLE IDENTIFIER('ta' || 'b') +-- !query analysis +TruncateTableCommand `spark_catalog`.`default`.`tab` + + +-- !query +DROP TABLE IF EXISTS tab +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tab + + +-- !query +CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1) +-- !query analysis +CreateViewCommand `spark_catalog`.`default`.`v`, [(c1,None)], VALUES(1), false, true, PersistedView, COMPENSATION, true + +- LocalRelation [col1#x] + + +-- !query +SELECT * FROM v +-- !query analysis +Project [c1#x] ++- SubqueryAlias spark_catalog.default.v + +- View (`spark_catalog`.`default`.`v`, [c1#x]) + +- Project [cast(col1#x as int) AS c1#x] + +- LocalRelation [col1#x] + + +-- !query +ALTER VIEW IDENTIFIER('v') AS VALUES(2) +-- !query analysis +AlterViewAsCommand `spark_catalog`.`default`.`v`, VALUES(2), true + +- LocalRelation [col1#x] + + +-- !query +DROP VIEW IDENTIFIER('v') +-- !query analysis +DropTableCommand `spark_catalog`.`default`.`v`, false, true, false + + +-- !query +CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1) +-- !query analysis +CreateViewCommand `v`, [(c1,None)], VALUES(1), false, false, LocalTempView, UNSUPPORTED, true + +- LocalRelation [col1#x] + + +-- !query +DROP VIEW IDENTIFIER('v') +-- !query analysis +DropTempViewCommand v + + +-- !query +CREATE SCHEMA IDENTIFIER('id' || 'ent') +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +ALTER SCHEMA IDENTIFIER('id' || 'ent') SET PROPERTIES (somekey = 'somevalue') +-- !query analysis +SetNamespaceProperties [somekey=somevalue] ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +ALTER SCHEMA IDENTIFIER('id' || 'ent') SET LOCATION 'someloc' +-- !query analysis +SetNamespaceLocation someloc ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +COMMENT ON SCHEMA IDENTIFIER('id' || 'ent') IS 'some comment' +-- !query analysis +CommentOnNamespace some comment ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +DESCRIBE SCHEMA IDENTIFIER('id' || 'ent') +-- !query analysis +DescribeNamespace false, [info_name#x, info_value#x] ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +SHOW TABLES IN IDENTIFIER('id' || 'ent') +-- !query analysis +ShowTables [namespace#x, tableName#x, isTemporary#x] ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +SHOW TABLE EXTENDED IN IDENTIFIER('id' || 'ent') LIKE 'hello' +-- !query analysis +ShowTablesCommand ident, hello, [namespace#x, tableName#x, isTemporary#x, information#x], true + + +-- !query +USE IDENTIFIER('id' || 'ent') +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +SHOW CURRENT SCHEMA +-- !query analysis +ShowCurrentNamespaceCommand + + +-- !query +USE SCHEMA IDENTIFIER('id' || 'ent') +-- !query analysis +SetNamespaceCommand [ident] + + +-- !query +USE SCHEMA default +-- !query analysis +SetNamespaceCommand [default] + + +-- !query +DROP SCHEMA IDENTIFIER('id' || 'ent') +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +CREATE SCHEMA ident +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +CREATE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' +-- !query analysis +CreateFunctionCommand spark_catalog.ident.myDoubleAvg, test.org.apache.spark.sql.MyDoubleAvg, false, false, false + + +-- !query +DESCRIBE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') +-- !query analysis +DescribeFunctionCommand org.apache.spark.sql.catalyst.expressions.ExpressionInfo@xxxxxxxx, false + + +-- !query +REFRESH FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') +-- !query analysis +RefreshFunctionCommand ident, mydoubleavg + + +-- !query +DROP FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') +-- !query analysis +DropFunctionCommand spark_catalog.ident.mydoubleavg, false, false + + +-- !query +DROP SCHEMA ident +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [ident] + + +-- !query +CREATE TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' +-- !query analysis +CreateFunctionCommand myDoubleAvg, test.org.apache.spark.sql.MyDoubleAvg, true, false, false + + +-- !query +DROP TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg') +-- !query analysis +DropFunctionCommand myDoubleAvg, false, true + + +-- !query +DECLARE var = 'sometable' +-- !query analysis +CreateVariable defaultvalueexpression(sometable, 'sometable'), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var + + +-- !query +CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`sometable`, false + + +-- !query +SET VAR var = 'c1' +-- !query analysis +SetVariable [variablereference(system.session.var='sometable')] ++- Project [c1 AS var#x] + +- OneRowRelation + + +-- !query +SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1) +-- !query analysis +Project [c1#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +SET VAR var = 'some' +-- !query analysis +SetVariable [variablereference(system.session.var='c1')] ++- Project [some AS var#x] + +- OneRowRelation + + +-- !query +DROP TABLE IDENTIFIER(var || 'table') +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.sometable + + +-- !query +SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'1'", + "hint" : ": extra input '1'" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 24, + "fragment" : "IDENTIFIER('c 1')" + } ] +} + + +-- !query +SELECT IDENTIFIER('') FROM VALUES(1) AS T(``) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_EMPTY_STATEMENT", + "sqlState" : "42617", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 21, + "fragment" : "IDENTIFIER('')" + } ] +} + + +-- !query +VALUES(IDENTIFIER(CAST(NULL AS STRING))) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.NULL", + "sqlState" : "42601", + "messageParameters" : { + "expr" : "CAST(NULL AS STRING)", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 38, + "fragment" : "CAST(NULL AS STRING)" + } ] +} + + +-- !query +VALUES(IDENTIFIER(1)) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.WRONG_TYPE", + "sqlState" : "42601", + "messageParameters" : { + "dataType" : "int", + "expr" : "1", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 19, + "fragment" : "1" + } ] +} + + +-- !query +VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1))) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.NOT_CONSTANT", + "sqlState" : "42601", + "messageParameters" : { + "expr" : "substr('HELLO', 1, CAST((rand() + CAST(1 AS DOUBLE)) AS INT))", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 48, + "fragment" : "SUBSTR('HELLO', 1, RAND() + 1)" + } ] +} + + +-- !query +SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`IDENTIFIER`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 26, + "fragment" : "`IDENTIFIER`('abs')" + } ] +} + + +-- !query +CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.WRONG_TYPE", + "sqlState" : "42601", + "messageParameters" : { + "dataType" : "int", + "expr" : "1", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 25, + "stopIndex" : 25, + "fragment" : "1" + } ] +} + + +-- !query +CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +DROP TABLE IDENTIFIER('a.b.c') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +DROP VIEW IDENTIFIER('a.b.c') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello' +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`.`c`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +VALUES(IDENTIFIER(1)()) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.WRONG_TYPE", + "sqlState" : "42601", + "messageParameters" : { + "dataType" : "int", + "expr" : "1", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 19, + "fragment" : "1" + } ] +} + + +-- !query +VALUES(IDENTIFIER('a.b.c.d')()) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", + "sqlState" : "42601", + "messageParameters" : { + "identifier" : "`a`.`b`.`c`.`d`", + "limit" : "2" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 30, + "fragment" : "IDENTIFIER('a.b.c.d')()" + } ] +} + + +-- !query +CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE", + "sqlState" : "42000", + "messageParameters" : { + "database" : "`default`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 108, + "fragment" : "CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg'" + } ] +} + + +-- !query +DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.MULTI_PART_NAME", + "sqlState" : "42000", + "messageParameters" : { + "name" : "`default`.`myDoubleAvg`", + "statement" : "DROP TEMPORARY FUNCTION" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 63, + "fragment" : "DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg')" + } ] +} + + +-- !query +CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS", + "sqlState" : "428EK", + "messageParameters" : { + "actualName" : "`default`.`v`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 62, + "fragment" : "CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1)" + } ] +} + + +-- !query +create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) +-- !query analysis +CreateViewCommand `v1`, (select my_col from (values (1), (2), (1) as (my_col)) group by 1), false, false, LocalTempView, UNSUPPORTED, true + +- Aggregate [my_col#x], [my_col#x] + +- SubqueryAlias __auto_generated_subquery_name + +- SubqueryAlias as + +- LocalRelation [my_col#x] + + +-- !query +cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) +-- !query analysis +CacheTableAsSelect t1, (select my_col from (values (1), (2), (1) as (my_col)) group by 1), false, true + +- Aggregate [my_col#x], [my_col#x] + +- SubqueryAlias __auto_generated_subquery_name + +- SubqueryAlias as + +- LocalRelation [my_col#x] + + +-- !query +create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) +-- !query analysis +CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`t2`, ErrorIfExists, [my_col] + +- Aggregate [my_col#x], [my_col#x] + +- SubqueryAlias __auto_generated_subquery_name + +- SubqueryAlias as + +- LocalRelation [my_col#x] + + +-- !query +insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1 +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t2, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/t2], Append, `spark_catalog`.`default`.`t2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t2), [my_col] ++- Project [my_col#x AS my_col#x] + +- Aggregate [my_col#x], [my_col#x] + +- SubqueryAlias __auto_generated_subquery_name + +- SubqueryAlias as + +- LocalRelation [my_col#x] + + +-- !query +drop view v1 +-- !query analysis +DropTempViewCommand v1 + + +-- !query +drop table t1 +-- !query analysis +DropTempViewCommand t1 + + +-- !query +drop table t2 +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t2 + + +-- !query +DECLARE agg = 'max' +-- !query analysis +CreateVariable defaultvalueexpression(max, 'max'), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.agg + + +-- !query +DECLARE col = 'c1' +-- !query analysis +CreateVariable defaultvalueexpression(c1, 'c1'), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.col + + +-- !query +DECLARE tab = 'T' +-- !query analysis +CreateVariable defaultvalueexpression(T, 'T'), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.tab + + +-- !query +WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), + T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) +SELECT IDENTIFIER(agg)(IDENTIFIER(col)) FROM IDENTIFIER(tab) +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias S +: +- Project [col1#x AS c1#x, col2#x AS c2#x] +: +- LocalRelation [col1#x, col2#x] +:- CTERelationDef xxxx, false +: +- SubqueryAlias T +: +- Project [col1#x AS c1#x, col2#x AS c2#x] +: +- LocalRelation [col1#x, col2#x] ++- Aggregate [max(c1#x) AS max(c1)#x] + +- SubqueryAlias T + +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false, 2 + + +-- !query +WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), + T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) +SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('T') +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias S +: +- Project [col1#x AS c1#x, col2#x AS c2#x] +: +- LocalRelation [col1#x, col2#x] +:- CTERelationDef xxxx, false +: +- SubqueryAlias T +: +- Project [col1#x AS c1#x, col2#x AS c2#x] +: +- LocalRelation [col1#x, col2#x] ++- Aggregate [max(c1#x) AS max(c1)#x] + +- SubqueryAlias T + +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false, 2 + + +-- !query +WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) +SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' || 'BC') +-- !query analysis +WithCTE +:- CTERelationDef xxxx, false +: +- SubqueryAlias ABC +: +- Project [col1#x AS c1#x, col2#x AS c2#x] +: +- LocalRelation [col1#x, col2#x] ++- Aggregate [max(c1#x) AS max(c1)#x] + +- SubqueryAlias ABC + +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false, 2 + + +-- !query +SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''x.win''", + "hint" : "" + } +} + + +-- !query +SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`t`", + "proposal" : "`c1`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 22, + "fragment" : "IDENTIFIER('t')" + } ] +} + + +-- !query +SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''a''", + "hint" : "" + } +} + + +-- !query +SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''a''", + "hint" : "" + } +} + + +-- !query +SELECT * FROM s.IDENTIFIER('tab') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +SELECT * FROM IDENTIFIER('s').tab +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''win''", + "hint" : "" + } +} + + +-- !query +SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : ": missing 'AS'" + } +} + + +-- !query +SELECT 1 AS IDENTIFIER('col1') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''my_table''", + "hint" : "" + } +} + + +-- !query +WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''v''", + "hint" : "" + } +} + + +-- !query +CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT c1 FROM v +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`v`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 16, + "fragment" : "v" + } ] +} + + +-- !query +CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : ": missing ')'" + } +} + + +-- !query +SELECT c1 FROM tab +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 18, + "fragment" : "tab" + } ] +} + + +-- !query +ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT col1 FROM tab +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 18, + "stopIndex" : 20, + "fragment" : "tab" + } ] +} + + +-- !query +ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT c2 FROM tab +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 18, + "fragment" : "tab" + } ] +} + + +-- !query +ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM tab_renamed +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tab_renamed`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 25, + "fragment" : "tab_renamed" + } ] +} + + +-- !query +CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE IF EXISTS test_col_with_dot +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_col_with_dot + + +-- !query +SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''schema.table''", + "hint" : "" + } +} + + +-- !query +SELECT 1 AS IDENTIFIER('col1.col2') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`test_show`, false + + +-- !query +SHOW VIEWS IN IDENTIFIER('default') +-- !query analysis +ShowViewsCommand default, [namespace#x, viewName#x, isTemporary#x] + + +-- !query +SHOW PARTITIONS IDENTIFIER('test_show') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY", + "sqlState" : "42601", + "messageParameters" : { + "name" : "`spark_catalog`.`default`.`test_show`" + } +} + + +-- !query +SHOW CREATE TABLE IDENTIFIER('test_show') +-- !query analysis +ShowCreateTable false, [createtab_stmt#x] ++- ResolvedTable V2SessionCatalog(spark_catalog), default.test_show, V1Table(default.test_show), [c1#x, c2#x] + + +-- !query +DROP TABLE test_show +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_show + + +-- !query +CREATE TABLE test_desc(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`test_desc`, false + + +-- !query +DESCRIBE TABLE IDENTIFIER('test_desc') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`test_desc`, false, [col_name#x, data_type#x, comment#x] + + +-- !query +DESCRIBE FORMATTED IDENTIFIER('test_desc') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`test_desc`, true, [col_name#x, data_type#x, comment#x] + + +-- !query +DESCRIBE EXTENDED IDENTIFIER('test_desc') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`test_desc`, true, [col_name#x, data_type#x, comment#x] + + +-- !query +DESC IDENTIFIER('test_desc') +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`test_desc`, false, [col_name#x, data_type#x, comment#x] + + +-- !query +DROP TABLE test_desc +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_desc + + +-- !query +CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`test_comment`, false + + +-- !query +COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' +-- !query analysis +CommentOnTable table comment ++- ResolvedTable V2SessionCatalog(spark_catalog), default.test_comment, V1Table(default.test_comment), [c1#x, c2#x] + + +-- !query +ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE test_comment +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_comment + + +-- !query +CREATE SCHEMA test_schema +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [test_schema] + + +-- !query +CREATE TABLE test_schema.test_table(c1 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`test_schema`.`test_table`, false + + +-- !query +ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS +-- !query analysis +AnalyzeTableCommand `spark_catalog`.`test_schema`.`test_table`, false + + +-- !query +REFRESH TABLE IDENTIFIER('test_schema.test_table') +-- !query analysis +RefreshTableCommand `spark_catalog`.`test_schema`.`test_table` + + +-- !query +DESCRIBE IDENTIFIER('test_schema.test_table') +-- !query analysis +DescribeTableCommand `spark_catalog`.`test_schema`.`test_table`, false, [col_name#x, data_type#x, comment#x] + + +-- !query +SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') +-- !query analysis +ShowColumnsCommand `spark_catalog`.`test_schema`.`test_table`, [col_name#x] + + +-- !query +DROP TABLE IDENTIFIER('test_schema.test_table') +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), test_schema.test_table + + +-- !query +DROP SCHEMA test_schema +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [test_schema] + + +-- !query +DECLARE IDENTIFIER('my_var') = 'value' +-- !query analysis +CreateVariable defaultvalueexpression(value, 'value'), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.my_var + + +-- !query +SET VAR IDENTIFIER('my_var') = 'new_value' +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : ": missing EQ" + } +} + + +-- !query +SELECT IDENTIFIER('my_var') +-- !query analysis +Project [variablereference(system.session.my_var='value') AS variablereference(system.session.my_var='value')#x] ++- OneRowRelation + + +-- !query +DROP TEMPORARY VARIABLE IDENTIFIER('my_var') +-- !query analysis +DropVariable false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.my_var + + +-- !query +CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) +RETURNS INT +RETURN IDENTIFIER('param1') + length(IDENTIFIER('param2')) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT test_udf(5, 'hello') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`test_udf`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 27, + "fragment" : "test_udf(5, 'hello')" + } ] +} + + +-- !query +DROP TEMPORARY FUNCTION test_udf +-- !query analysis +org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException +{ + "errorClass" : "ROUTINE_NOT_FOUND", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`test_udf`" + } +} + + +-- !query +CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) +RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) +RETURN SELECT IDENTIFIER('input_val'), 'result' +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM test_table_udf(42) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVABLE_TABLE_VALUED_FUNCTION", + "sqlState" : "42883", + "messageParameters" : { + "name" : "`test_table_udf`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 32, + "fragment" : "test_table_udf(42)" + } ] +} + + +-- !query +DROP TEMPORARY FUNCTION test_table_udf +-- !query analysis +org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException +{ + "errorClass" : "ROUTINE_NOT_FOUND", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`test_table_udf`" + } +} + + +-- !query +BEGIN + IDENTIFIER('loop_label'): LOOP + SELECT 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +LEAVE IDENTIFIER('loop_label') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'LEAVE'", + "hint" : "" + } +} + + +-- !query +END LOOP loop_label +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + block_label: BEGIN + DECLARE IDENTIFIER('x') INT DEFAULT 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +SELECT x +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`x`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 8, + "fragment" : "x" + } ] +} + + +-- !query +END IDENTIFIER('block_label') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + DECLARE IDENTIFIER('counter') INT DEFAULT 0 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO + SET VAR counter = IDENTIFIER('counter') + 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +END WHILE while_label +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('counter') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`counter`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 28, + "fragment" : "IDENTIFIER('counter')" + } ] +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + DECLARE IDENTIFIER('cnt') INT DEFAULT 0 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +repeat_label: REPEAT + SET VAR IDENTIFIER('cnt') = cnt + 1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'repeat_label'", + "hint" : "" + } +} + + +-- !query +UNTIL IDENTIFIER('cnt') >= 2 + END REPEAT IDENTIFIER('repeat_label') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'UNTIL'", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('cnt') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`cnt`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 24, + "fragment" : "IDENTIFIER('cnt')" + } ] +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO + SELECT row.c1 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +END FOR IDENTIFIER('for_label') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_EXTRACT_BASE_FIELD_TYPE", + "sqlState" : "42000", + "messageParameters" : { + "base" : "\"variablereference(system.session.tab='T')\"", + "other" : "\"STRING\"" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' + USING 'c1' AS col1 +-- !query analysis +Project [c1#x.c2 AS c1.c2#x] ++- SubqueryAlias T + +- LocalRelation [c1#x] + + +-- !query +CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`integration_test`, false + + +-- !query +INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test], Append, `spark_catalog`.`default`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test), [c1, c2] ++- Project [col1#x AS c1#x, col2#x AS c2#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' + USING 'default' AS schema, 'integration_test' AS table +-- !query analysis +Project [c1#x, c2#x] ++- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' + USING 'c' AS prefix +-- !query analysis +Project [c1#x, c2#x] ++- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' + USING 'c1' AS col, 1 AS val +-- !query analysis +Project [c1#x, c2#x] ++- Filter (c1#x = 1) + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`integration_test2`, false + + +-- !query +INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test2, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test2], Append, `spark_catalog`.`default`.`integration_test2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test2), [c1, c3] ++- Project [col1#x AS c1#x, col2#x AS c3#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' + USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 90, + "fragment" : "SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))" + } ] +} + + +-- !query +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) ORDER BY IDENTIFIER(:ord)) as rn FROM integration_test' + USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord +-- !query analysis +Project [c1#x, c2#x, rn#x] ++- Project [c1#x, c2#x, rn#x, rn#x] + +- Window [row_number() windowspecdefinition(c2#x, c1#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#x], [c2#x], [c1#x ASC NULLS FIRST] + +- Project [c1#x, c2#x] + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' + USING 'c' AS prefix, 'count' AS agg, 'c1' AS col +-- !query analysis +Aggregate [c2#x], [c2#x, count(c1#x) AS count(c1)#xL] ++- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' + USING 'c1' AS col1, 'c2' AS col2 +-- !query analysis +Sort [c1#x DESC NULLS LAST, c2#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x] + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' + USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : ": missing ')'" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 88, + "fragment" : "INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' + USING 'default' AS schema, 'my_table' AS table, 't' AS alias +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "':'", + "hint" : ": extra input ':'" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 145, + "fragment" : "SELECT IDENTIFIER(concat(:schema, '.', :table, '.c1')) FROM VALUES(named_struct('c1', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema '.' :table))" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' + USING 'my_cte' AS cte_name +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "':'", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 82, + "fragment" : "WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' + USING 'test_view' AS view_name, 'test_col' AS col_name +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 91, + "fragment" : "CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' + USING 'test_col' AS col, 'test_view' AS view +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`test_view`" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 30, + "stopIndex" : 46, + "fragment" : "IDENTIFIER(:view)" + } ] +} + + +-- !query +DROP VIEW test_view +-- !query analysis +org.apache.spark.sql.catalyst.analysis.NoSuchTableException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`spark_catalog`.`default`.`test_view`" + } +} + + +-- !query +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' + USING 'integration_test' AS tab, 'c4' AS new_col +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 64, + "fragment" : "ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' + USING 'integration_test' AS tab, 'c4' AS old_col, 'c5' AS new_col +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 87, + "fragment" : "ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' + USING 'mykey' AS key, 42 AS val +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "':'", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 49, + "fragment" : "SELECT map(:key, :val).IDENTIFIER(:key) AS result" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' + USING 't' AS alias +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "':'", + "hint" : ": extra input ':'" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 75, + "fragment" : "SELECT IDENTIFIER(:alias '.c1') FROM integration_test AS IDENTIFIER(:alias)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' + USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab +-- !query analysis +Sort [c1#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x] + +- Filter (c1#x > 0) + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE + 'BEGIN + DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'''", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER(:var_name) AS result +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNBOUND_SQL_PARAMETER", + "sqlState" : "42P02", + "messageParameters" : { + "name" : "var_name" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 27, + "fragment" : ":var_name" + } ] +} + + +-- !query +END' + USING 'my_variable' AS var_name, 100 AS var_value +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' + USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias +-- !query analysis +Project [c1#x, c2#x] ++- Filter (c1#x > 0) + +- SubqueryAlias spark_catalog.default.integration_test + +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + + +-- !query +EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' + USING 'default' AS schema, 'col1' AS col +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 40, + "fragment" : "SELECT 1 AS IDENTIFIER(:schema '.' :col)" + } ] +} + + +-- !query +DROP TABLE integration_test +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.integration_test + + +-- !query +DROP TABLE integration_test2 +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.integration_test2 diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause-legacy.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause-legacy.sql new file mode 100644 index 000000000000..ae1f10f1af1f --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause-legacy.sql @@ -0,0 +1,2 @@ +--SET spark.sql.legacy.identifierClause = true +--IMPORT identifier-clause.sql diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out new file mode 100644 index 000000000000..1c546688e2d1 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out @@ -0,0 +1,2891 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +SET hivevar:colname = 'c' +-- !query schema +struct +-- !query output +hivevar:colname 'c' + + +-- !query +SELECT IDENTIFIER(${colname} || '_1') FROM VALUES(1) AS T(c_1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``) +-- !query schema +struct<:int> +-- !query output +1 + + +-- !query +SELECT IDENTIFIER('c' || '1') FROM VALUES(1) AS T(c1) +-- !query schema +struct +-- !query output +1 + + +-- !query +CREATE SCHEMA IF NOT EXISTS s +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE s.tab(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +USE SCHEMA s +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO IDENTIFIER('ta' || 'b') VALUES(1) +-- !query schema +struct<> +-- !query output + + + +-- !query +DELETE FROM IDENTIFIER('ta' || 'b') WHERE 1=0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", + "messageParameters" : { + "operation" : "DELETE", + "tableName" : "`spark_catalog`.`s`.`tab`" + } +} + + +-- !query +UPDATE IDENTIFIER('ta' || 'b') SET c1 = 2 +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUnsupportedOperationException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "messageParameters" : { + "ddl" : "UPDATE TABLE" + } +} + + +-- !query +MERGE INTO IDENTIFIER('ta' || 'b') AS t USING IDENTIFIER('ta' || 'b') AS s ON s.c1 = t.c1 + WHEN MATCHED THEN UPDATE SET c1 = 3 +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkUnsupportedOperationException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "messageParameters" : { + "ddl" : "MERGE INTO TABLE" + } +} + + +-- !query +SELECT * FROM IDENTIFIER('tab') +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT * FROM IDENTIFIER('s.tab') +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT * FROM IDENTIFIER('`s`.`tab`') +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT * FROM IDENTIFIER('t' || 'a' || 'b') +-- !query schema +struct +-- !query output +1 + + +-- !query +USE SCHEMA default +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE s.tab +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA s +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT IDENTIFIER('COAL' || 'ESCE')(NULL, 1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1) +-- !query schema +struct +-- !query output +0 + + +-- !query +CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b') +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA identifier_clauses +-- !query schema +struct<> +-- !query output + + + +-- !query +USE identifier_clauses +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' || 'tab') +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", + "messageParameters" : { + "operation" : "REPLACE TABLE", + "tableName" : "`spark_catalog`.`identifier_clauses`.`tab`" + } +} + + +-- !query +CACHE TABLE IDENTIFIER('ta' || 'b') +-- !query schema +struct<> +-- !query output + + + +-- !query +UNCACHE TABLE IDENTIFIER('ta' || 'b') +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b') +-- !query schema +struct<> +-- !query output + + + +-- !query +USE default +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA identifier_clauses +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE tab(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO tab VALUES (1) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT c1 FROM tab +-- !query schema +struct +-- !query output +1 + + +-- !query +DESCRIBE IDENTIFIER('ta' || 'b') +-- !query schema +struct +-- !query output +c1 int + + +-- !query +ANALYZE TABLE IDENTIFIER('ta' || 'b') COMPUTE STATISTICS +-- !query schema +struct<> +-- !query output + + + +-- !query +ALTER TABLE IDENTIFIER('ta' || 'b') ADD COLUMN c2 INT +-- !query schema +struct<> +-- !query output + + + +-- !query +SHOW TBLPROPERTIES IDENTIFIER('ta' || 'b') +-- !query schema +struct +-- !query output + + + +-- !query +SHOW COLUMNS FROM IDENTIFIER('ta' || 'b') +-- !query schema +struct +-- !query output +c1 +c2 + + +-- !query +COMMENT ON TABLE IDENTIFIER('ta' || 'b') IS 'hello' +-- !query schema +struct<> +-- !query output + + + +-- !query +REFRESH TABLE IDENTIFIER('ta' || 'b') +-- !query schema +struct<> +-- !query output + + + +-- !query +REPAIR TABLE IDENTIFIER('ta' || 'b') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_PARTITIONED_TABLE", + "sqlState" : "42809", + "messageParameters" : { + "operation" : "MSCK REPAIR TABLE", + "tableIdentWithDB" : "`spark_catalog`.`default`.`tab`" + } +} + + +-- !query +TRUNCATE TABLE IDENTIFIER('ta' || 'b') +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS tab +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM v +-- !query schema +struct +-- !query output +1 + + +-- !query +ALTER VIEW IDENTIFIER('v') AS VALUES(2) +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IDENTIFIER('v') +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1) +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP VIEW IDENTIFIER('v') +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA IDENTIFIER('id' || 'ent') +-- !query schema +struct<> +-- !query output + + + +-- !query +ALTER SCHEMA IDENTIFIER('id' || 'ent') SET PROPERTIES (somekey = 'somevalue') +-- !query schema +struct<> +-- !query output + + + +-- !query +ALTER SCHEMA IDENTIFIER('id' || 'ent') SET LOCATION 'someloc' +-- !query schema +struct<> +-- !query output + + + +-- !query +COMMENT ON SCHEMA IDENTIFIER('id' || 'ent') IS 'some comment' +-- !query schema +struct<> +-- !query output + + + +-- !query +DESCRIBE SCHEMA IDENTIFIER('id' || 'ent') +-- !query schema +struct +-- !query output +Catalog Name spark_catalog +Comment some comment +Location [not included in comparison]/{warehouse_dir}/someloc +Namespace Name ident +Owner [not included in comparison] + + +-- !query +SHOW TABLES IN IDENTIFIER('id' || 'ent') +-- !query schema +struct +-- !query output + + + +-- !query +SHOW TABLE EXTENDED IN IDENTIFIER('id' || 'ent') LIKE 'hello' +-- !query schema +struct +-- !query output + + + +-- !query +USE IDENTIFIER('id' || 'ent') +-- !query schema +struct<> +-- !query output + + + +-- !query +SHOW CURRENT SCHEMA +-- !query schema +struct +-- !query output +spark_catalog ident + + +-- !query +USE SCHEMA IDENTIFIER('id' || 'ent') +-- !query schema +struct<> +-- !query output + + + +-- !query +USE SCHEMA default +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA IDENTIFIER('id' || 'ent') +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA ident +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' +-- !query schema +struct<> +-- !query output + + + +-- !query +DESCRIBE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') +-- !query schema +struct +-- !query output +Class: test.org.apache.spark.sql.MyDoubleAvg +Function: spark_catalog.ident.mydoubleavg +Usage: N/A. + + +-- !query +REFRESH FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA ident +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg') +-- !query schema +struct<> +-- !query output + + + +-- !query +DECLARE var = 'sometable' +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +SET VAR var = 'c1' +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1) +-- !query schema +struct +-- !query output +1 + + +-- !query +SET VAR var = 'some' +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IDENTIFIER(var || 'table') +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'1'", + "hint" : ": extra input '1'" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 24, + "fragment" : "IDENTIFIER('c 1')" + } ] +} + + +-- !query +SELECT IDENTIFIER('') FROM VALUES(1) AS T(``) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_EMPTY_STATEMENT", + "sqlState" : "42617", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 21, + "fragment" : "IDENTIFIER('')" + } ] +} + + +-- !query +VALUES(IDENTIFIER(CAST(NULL AS STRING))) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.NULL", + "sqlState" : "42601", + "messageParameters" : { + "expr" : "CAST(NULL AS STRING)", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 38, + "fragment" : "CAST(NULL AS STRING)" + } ] +} + + +-- !query +VALUES(IDENTIFIER(1)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.WRONG_TYPE", + "sqlState" : "42601", + "messageParameters" : { + "dataType" : "int", + "expr" : "1", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 19, + "fragment" : "1" + } ] +} + + +-- !query +VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1))) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.NOT_CONSTANT", + "sqlState" : "42601", + "messageParameters" : { + "expr" : "substr('HELLO', 1, CAST((rand() + CAST(1 AS DOUBLE)) AS INT))", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 48, + "fragment" : "SUBSTR('HELLO', 1, RAND() + 1)" + } ] +} + + +-- !query +SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`IDENTIFIER`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 26, + "fragment" : "`IDENTIFIER`('abs')" + } ] +} + + +-- !query +CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.WRONG_TYPE", + "sqlState" : "42601", + "messageParameters" : { + "dataType" : "int", + "expr" : "1", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 25, + "stopIndex" : 25, + "fragment" : "1" + } ] +} + + +-- !query +CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +DROP TABLE IDENTIFIER('a.b.c') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +DROP VIEW IDENTIFIER('a.b.c') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "REQUIRES_SINGLE_PART_NAMESPACE", + "sqlState" : "42K05", + "messageParameters" : { + "namespace" : "`a`.`b`.`c`", + "sessionCatalog" : "spark_catalog" + } +} + + +-- !query +VALUES(IDENTIFIER(1)()) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NOT_A_CONSTANT_STRING.WRONG_TYPE", + "sqlState" : "42601", + "messageParameters" : { + "dataType" : "int", + "expr" : "1", + "name" : "IDENTIFIER" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 19, + "fragment" : "1" + } ] +} + + +-- !query +VALUES(IDENTIFIER('a.b.c.d')()) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", + "sqlState" : "42601", + "messageParameters" : { + "identifier" : "`a`.`b`.`c`.`d`", + "limit" : "2" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 30, + "fragment" : "IDENTIFIER('a.b.c.d')()" + } ] +} + + +-- !query +CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE", + "sqlState" : "42000", + "messageParameters" : { + "database" : "`default`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 108, + "fragment" : "CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg'" + } ] +} + + +-- !query +DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "INVALID_SQL_SYNTAX.MULTI_PART_NAME", + "sqlState" : "42000", + "messageParameters" : { + "name" : "`default`.`myDoubleAvg`", + "statement" : "DROP TEMPORARY FUNCTION" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 63, + "fragment" : "DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg')" + } ] +} + + +-- !query +CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS", + "sqlState" : "428EK", + "messageParameters" : { + "actualName" : "`default`.`v`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 62, + "fragment" : "CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1)" + } ] +} + + +-- !query +create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) +-- !query schema +struct<> +-- !query output + + + +-- !query +cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) +-- !query schema +struct<> +-- !query output + + + +-- !query +create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) +-- !query schema +struct<> +-- !query output + + + +-- !query +insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1 +-- !query schema +struct<> +-- !query output + + + +-- !query +drop view v1 +-- !query schema +struct<> +-- !query output + + + +-- !query +drop table t1 +-- !query schema +struct<> +-- !query output + + + +-- !query +drop table t2 +-- !query schema +struct<> +-- !query output + + + +-- !query +DECLARE agg = 'max' +-- !query schema +struct<> +-- !query output + + + +-- !query +DECLARE col = 'c1' +-- !query schema +struct<> +-- !query output + + + +-- !query +DECLARE tab = 'T' +-- !query schema +struct<> +-- !query output + + + +-- !query +WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), + T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) +SELECT IDENTIFIER(agg)(IDENTIFIER(col)) FROM IDENTIFIER(tab) +-- !query schema +struct +-- !query output +c + + +-- !query +WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), + T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) +SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('T') +-- !query schema +struct +-- !query output +c + + +-- !query +WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) +SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' || 'BC') +-- !query schema +struct +-- !query output +2 + + +-- !query +SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''x.win''", + "hint" : "" + } +} + + +-- !query +SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`t`", + "proposal" : "`c1`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 22, + "fragment" : "IDENTIFIER('t')" + } ] +} + + +-- !query +SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''a''", + "hint" : "" + } +} + + +-- !query +SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''a''", + "hint" : "" + } +} + + +-- !query +SELECT * FROM s.IDENTIFIER('tab') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +SELECT * FROM IDENTIFIER('s').tab +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''win''", + "hint" : "" + } +} + + +-- !query +SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : ": missing 'AS'" + } +} + + +-- !query +SELECT 1 AS IDENTIFIER('col1') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''my_table''", + "hint" : "" + } +} + + +-- !query +WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''v''", + "hint" : "" + } +} + + +-- !query +CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT c1 FROM v +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`v`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 16, + "fragment" : "v" + } ] +} + + +-- !query +CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : ": missing ')'" + } +} + + +-- !query +SELECT c1 FROM tab +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 18, + "fragment" : "tab" + } ] +} + + +-- !query +ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT col1 FROM tab +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 18, + "stopIndex" : 20, + "fragment" : "tab" + } ] +} + + +-- !query +ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT c2 FROM tab +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tab`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 18, + "fragment" : "tab" + } ] +} + + +-- !query +ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM tab_renamed +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`tab_renamed`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 25, + "fragment" : "tab_renamed" + } ] +} + + +-- !query +CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE IF EXISTS test_col_with_dot +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''schema.table''", + "hint" : "" + } +} + + +-- !query +SELECT 1 AS IDENTIFIER('col1.col2') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +SHOW VIEWS IN IDENTIFIER('default') +-- !query schema +struct +-- !query output + + + +-- !query +SHOW PARTITIONS IDENTIFIER('test_show') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY", + "sqlState" : "42601", + "messageParameters" : { + "name" : "`spark_catalog`.`default`.`test_show`" + } +} + + +-- !query +SHOW CREATE TABLE IDENTIFIER('test_show') +-- !query schema +struct +-- !query output +CREATE TABLE spark_catalog.default.test_show ( + c1 INT, + c2 STRING) +USING CSV + + +-- !query +DROP TABLE test_show +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE test_desc(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +DESCRIBE TABLE IDENTIFIER('test_desc') +-- !query schema +struct +-- !query output +c1 int + + +-- !query +DESCRIBE FORMATTED IDENTIFIER('test_desc') +-- !query schema +struct +-- !query output +c1 int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table test_desc +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider CSV +Location [not included in comparison]/{warehouse_dir}/test_desc + + +-- !query +DESCRIBE EXTENDED IDENTIFIER('test_desc') +-- !query schema +struct +-- !query output +c1 int + +# Detailed Table Information +Catalog spark_catalog +Database default +Table test_desc +Created Time [not included in comparison] +Last Access [not included in comparison] +Created By [not included in comparison] +Type MANAGED +Provider CSV +Location [not included in comparison]/{warehouse_dir}/test_desc + + +-- !query +DESC IDENTIFIER('test_desc') +-- !query schema +struct +-- !query output +c1 int + + +-- !query +DROP TABLE test_desc +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' +-- !query schema +struct<> +-- !query output + + + +-- !query +ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE test_comment +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE SCHEMA test_schema +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE test_schema.test_table(c1 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS +-- !query schema +struct<> +-- !query output + + + +-- !query +REFRESH TABLE IDENTIFIER('test_schema.test_table') +-- !query schema +struct<> +-- !query output + + + +-- !query +DESCRIBE IDENTIFIER('test_schema.test_table') +-- !query schema +struct +-- !query output +c1 int + + +-- !query +SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') +-- !query schema +struct +-- !query output +c1 + + +-- !query +DROP TABLE IDENTIFIER('test_schema.test_table') +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP SCHEMA test_schema +-- !query schema +struct<> +-- !query output + + + +-- !query +DECLARE IDENTIFIER('my_var') = 'value' +-- !query schema +struct<> +-- !query output + + + +-- !query +SET VAR IDENTIFIER('my_var') = 'new_value' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : ": missing EQ" + } +} + + +-- !query +SELECT IDENTIFIER('my_var') +-- !query schema +struct +-- !query output +value + + +-- !query +DROP TEMPORARY VARIABLE IDENTIFIER('my_var') +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) +RETURNS INT +RETURN IDENTIFIER('param1') + length(IDENTIFIER('param2')) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT test_udf(5, 'hello') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`test_udf`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 27, + "fragment" : "test_udf(5, 'hello')" + } ] +} + + +-- !query +DROP TEMPORARY FUNCTION test_udf +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException +{ + "errorClass" : "ROUTINE_NOT_FOUND", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`test_udf`" + } +} + + +-- !query +CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) +RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) +RETURN SELECT IDENTIFIER('input_val'), 'result' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM test_table_udf(42) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVABLE_TABLE_VALUED_FUNCTION", + "sqlState" : "42883", + "messageParameters" : { + "name" : "`test_table_udf`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 15, + "stopIndex" : 32, + "fragment" : "test_table_udf(42)" + } ] +} + + +-- !query +DROP TEMPORARY FUNCTION test_table_udf +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException +{ + "errorClass" : "ROUTINE_NOT_FOUND", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`test_table_udf`" + } +} + + +-- !query +BEGIN + IDENTIFIER('loop_label'): LOOP + SELECT 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +LEAVE IDENTIFIER('loop_label') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'LEAVE'", + "hint" : "" + } +} + + +-- !query +END LOOP loop_label +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + block_label: BEGIN + DECLARE IDENTIFIER('x') INT DEFAULT 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +SELECT x +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`x`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 8, + "fragment" : "x" + } ] +} + + +-- !query +END IDENTIFIER('block_label') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + DECLARE IDENTIFIER('counter') INT DEFAULT 0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO + SET VAR counter = IDENTIFIER('counter') + 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +END WHILE while_label +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('counter') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`counter`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 28, + "fragment" : "IDENTIFIER('counter')" + } ] +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + DECLARE IDENTIFIER('cnt') INT DEFAULT 0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "end of input", + "hint" : "" + } +} + + +-- !query +repeat_label: REPEAT + SET VAR IDENTIFIER('cnt') = cnt + 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'repeat_label'", + "hint" : "" + } +} + + +-- !query +UNTIL IDENTIFIER('cnt') >= 2 + END REPEAT IDENTIFIER('repeat_label') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'UNTIL'", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER('cnt') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`cnt`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 24, + "fragment" : "IDENTIFIER('cnt')" + } ] +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +BEGIN + IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO + SELECT row.c1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'IDENTIFIER'", + "hint" : "" + } +} + + +-- !query +END FOR IDENTIFIER('for_label') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +END +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_EXTRACT_BASE_FIELD_TYPE", + "sqlState" : "42000", + "messageParameters" : { + "base" : "\"variablereference(system.session.tab='T')\"", + "other" : "\"STRING\"" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' + USING 'c1' AS col1 +-- !query schema +struct +-- !query output +42 + + +-- !query +CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') +-- !query schema +struct<> +-- !query output + + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' + USING 'default' AS schema, 'integration_test' AS table +-- !query schema +struct +-- !query output +1 a +2 b + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' + USING 'c' AS prefix +-- !query schema +struct +-- !query output +1 a +2 b + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' + USING 'c1' AS col, 1 AS val +-- !query schema +struct +-- !query output +1 a + + +-- !query +CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') +-- !query schema +struct<> +-- !query output + + + +-- !query +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' + USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 90, + "fragment" : "SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))" + } ] +} + + +-- !query +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) ORDER BY IDENTIFIER(:ord)) as rn FROM integration_test' + USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord +-- !query schema +struct +-- !query output +1 a 1 +2 b 1 + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' + USING 'c' AS prefix, 'count' AS agg, 'c1' AS col +-- !query schema +struct +-- !query output +a 1 +b 1 + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' + USING 'c1' AS col1, 'c2' AS col2 +-- !query schema +struct +-- !query output +2 b +1 a + + +-- !query +EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' + USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : ": missing ')'" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 88, + "fragment" : "INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' + USING 'default' AS schema, 'my_table' AS table, 't' AS alias +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "':'", + "hint" : ": extra input ':'" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 145, + "fragment" : "SELECT IDENTIFIER(concat(:schema, '.', :table, '.c1')) FROM VALUES(named_struct('c1', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema '.' :table))" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' + USING 'my_cte' AS cte_name +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "':'", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 82, + "fragment" : "WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' + USING 'test_view' AS view_name, 'test_col' AS col_name +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 91, + "fragment" : "CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' + USING 'test_col' AS col, 'test_view' AS view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`test_view`" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 30, + "stopIndex" : 46, + "fragment" : "IDENTIFIER(:view)" + } ] +} + + +-- !query +DROP VIEW test_view +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.analysis.NoSuchTableException +{ + "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", + "sqlState" : "42P01", + "messageParameters" : { + "relationName" : "`spark_catalog`.`default`.`test_view`" + } +} + + +-- !query +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' + USING 'integration_test' AS tab, 'c4' AS new_col +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 64, + "fragment" : "ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' + USING 'integration_test' AS tab, 'c4' AS old_col, 'c5' AS new_col +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 87, + "fragment" : "ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' + USING 'mykey' AS key, 42 AS val +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "':'", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 49, + "fragment" : "SELECT map(:key, :val).IDENTIFIER(:key) AS result" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' + USING 't' AS alias +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "':'", + "hint" : ": extra input ':'" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 75, + "fragment" : "SELECT IDENTIFIER(:alias '.c1') FROM integration_test AS IDENTIFIER(:alias)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' + USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab +-- !query schema +struct +-- !query output +1 a +2 b + + +-- !query +EXECUTE IMMEDIATE + 'BEGIN + DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'''", + "hint" : "" + } +} + + +-- !query +SELECT IDENTIFIER(:var_name) AS result +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNBOUND_SQL_PARAMETER", + "sqlState" : "42P02", + "messageParameters" : { + "name" : "var_name" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 19, + "stopIndex" : 27, + "fragment" : ":var_name" + } ] +} + + +-- !query +END' + USING 'my_variable' AS var_name, 100 AS var_value +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'END'", + "hint" : "" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' + USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias +-- !query schema +struct +-- !query output +1 a +2 b + + +-- !query +EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' + USING 'default' AS schema, 'col1' AS col +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 40, + "fragment" : "SELECT 1 AS IDENTIFIER(:schema '.' :col)" + } ] +} + + +-- !query +DROP TABLE integration_test +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE integration_test2 +-- !query schema +struct<> +-- !query output + From 87cffc85b38999f569f9f0ee13574e8497153cb0 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 5 Nov 2025 12:09:47 -0800 Subject: [PATCH 06/37] Fix SQL Scripting --- .../resources/error/error-conditions.json | 5 - .../sql/catalyst/parser/AstBuilder.scala | 2 +- .../sql/catalyst/parser/ParserUtils.scala | 60 ++- .../sql/errors/QueryExecutionErrors.scala | 8 +- .../spark/sql/execution/SparkStrategies.scala | 37 +- .../identifier-clause-legacy.sql.out | 386 +--------------- .../identifier-clause.sql.out | 386 +--------------- .../sql-tests/inputs/identifier-clause.sql | 50 -- .../results/identifier-clause-legacy.sql.out | 434 +----------------- .../results/identifier-clause.sql.out | 434 +----------------- .../sql/connector/DataSourceV2SQLSuite.scala | 12 +- .../sql/scripting/SqlScriptingE2eSuite.scala | 62 +++ 12 files changed, 190 insertions(+), 1686 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index e7334b92983c..92499a861b16 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -8473,11 +8473,6 @@ "Failed to merge incompatible schemas and ." ] }, - "_LEGACY_ERROR_TEMP_2096" : { - "message" : [ - " is not supported temporarily." - ] - }, "_LEGACY_ERROR_TEMP_2097" : { "message" : [ "Could not execute broadcast in secs. You can increase the timeout for broadcasts via or disable broadcast join by setting to -1 or remove the broadcast hint if it exists in your code." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 773f95109009..b355964a781d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -603,7 +603,7 @@ class AstBuilder extends DataTypeAstBuilder SingleStatement(visitQuery(queryCtx)) } parsingCtx.labelContext.enterForScope(Option(ctx.multipartIdentifier())) - val varName = Option(ctx.multipartIdentifier()).map(_.getText) + val varName = Option(ctx.multipartIdentifier()).map(ParserUtils.getMultipartIdentifierText) val body = visitCompoundBodyImpl( ctx.compoundBody(), None, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index a19b4cca2817..12aec6d7f797 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -20,6 +20,7 @@ import java.util import java.util.Locale import scala.collection.{immutable, mutable} +import scala.jdk.CollectionConverters._ import scala.util.matching.Regex import org.antlr.v4.runtime.{ParserRuleContext, Token} @@ -47,6 +48,38 @@ object ParserUtils extends SparkParserUtils { throw QueryParsingErrors.invalidStatementError(statement, ctx) } + /** + * Gets the resolved text of a multipart identifier, handling IDENTIFIER('literal') syntax. + * This parses each part through CatalystSqlParser to resolve identifier-lite expressions. + */ + def getMultipartIdentifierText(ctx: MultipartIdentifierContext): String = { + // Build the text by resolving each part + val parts = ctx.parts.asScala.flatMap { part => + val partText = part.getText + // Check if this looks like IDENTIFIER('...') + if (partText.startsWith("IDENTIFIER(") && partText.endsWith(")")) { + // Extract the literal string between the parentheses + val literal = partText.substring("IDENTIFIER(".length, partText.length - 1) + // Remove quotes and unescape + val unquoted = if (literal.startsWith("'") && literal.endsWith("'")) { + literal.substring(1, literal.length - 1).replace("''", "'") + } else { + literal + } + // Parse as multipart identifier + try { + CatalystSqlParser.parseMultipartIdentifier(unquoted) + } catch { + case _: Exception => Seq(partText) + } + } else { + // Regular identifier + Seq(partText) + } + }.mkString(".") + parts + } + def checkDuplicateClauses[T]( nodes: util.List[T], clauseName: String, ctx: ParserRuleContext): Unit = { if (nodes.size() > 1) { @@ -285,13 +318,15 @@ class SqlScriptingLabelContext { (beginLabelCtx, endLabelCtx) match { // Throw an error if labels do not match. case (Some(bl: BeginLabelContext), Some(el: EndLabelContext)) - if bl.multipartIdentifier().getText.toLowerCase(Locale.ROOT) != - el.multipartIdentifier().getText.toLowerCase(Locale.ROOT) => + if ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()) + .toLowerCase(Locale.ROOT) != + ParserUtils.getMultipartIdentifierText(el.multipartIdentifier()) + .toLowerCase(Locale.ROOT) => withOrigin(bl) { throw SqlScriptingErrors.labelsMismatch( CurrentOrigin.get, - bl.multipartIdentifier().getText, - el.multipartIdentifier().getText) + ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()), + ParserUtils.getMultipartIdentifierText(el.multipartIdentifier())) } // Throw an error if label is qualified. case (Some(bl: BeginLabelContext), _) @@ -299,14 +334,15 @@ class SqlScriptingLabelContext { withOrigin(bl) { throw SqlScriptingErrors.labelCannotBeQualified( CurrentOrigin.get, - bl.multipartIdentifier().getText.toLowerCase(Locale.ROOT) + ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()) + .toLowerCase(Locale.ROOT) ) } // Throw an error if end label exists without begin label. case (None, Some(el: EndLabelContext)) => withOrigin(el) { throw SqlScriptingErrors.endLabelWithoutBeginLabel( - CurrentOrigin.get, el.multipartIdentifier().getText) + CurrentOrigin.get, ParserUtils.getMultipartIdentifierText(el.multipartIdentifier())) } case _ => } @@ -324,7 +360,7 @@ class SqlScriptingLabelContext { private def assertIdentifierNotInSeenLabels( identifierCtx: Option[MultipartIdentifierContext]): Unit = { identifierCtx.foreach { ctx => - val identifierName = ctx.getText + val identifierName = ParserUtils.getMultipartIdentifierText(ctx) if (seenLabels.contains(identifierName.toLowerCase(Locale.ROOT))) { withOrigin(ctx) { throw SqlScriptingErrors @@ -348,7 +384,8 @@ class SqlScriptingLabelContext { // Get label text and add it to seenLabels. val labelText = if (isLabelDefined(beginLabelCtx)) { - val txt = beginLabelCtx.get.multipartIdentifier().getText.toLowerCase(Locale.ROOT) + val txt = ParserUtils.getMultipartIdentifierText( + beginLabelCtx.get.multipartIdentifier()).toLowerCase(Locale.ROOT) if (seenLabels.contains(txt)) { withOrigin(beginLabelCtx.get) { throw SqlScriptingErrors.duplicateLabels(CurrentOrigin.get, txt) @@ -374,7 +411,8 @@ class SqlScriptingLabelContext { */ def exitLabeledScope(beginLabelCtx: Option[BeginLabelContext]): Unit = { if (isLabelDefined(beginLabelCtx)) { - seenLabels.remove(beginLabelCtx.get.multipartIdentifier().getText.toLowerCase(Locale.ROOT)) + seenLabels.remove(ParserUtils.getMultipartIdentifierText( + beginLabelCtx.get.multipartIdentifier()).toLowerCase(Locale.ROOT)) } } @@ -385,7 +423,7 @@ class SqlScriptingLabelContext { */ def enterForScope(identifierCtx: Option[MultipartIdentifierContext]): Unit = { identifierCtx.foreach { ctx => - val identifierName = ctx.getText + val identifierName = ParserUtils.getMultipartIdentifierText(ctx) assertIdentifierNotInSeenLabels(identifierCtx) seenLabels.add(identifierName.toLowerCase(Locale.ROOT)) @@ -405,7 +443,7 @@ class SqlScriptingLabelContext { */ def exitForScope(identifierCtx: Option[MultipartIdentifierContext]): Unit = { identifierCtx.foreach { ctx => - val identifierName = ctx.getText + val identifierName = ParserUtils.getMultipartIdentifierText(ctx) seenLabels.remove(identifierName.toLowerCase(Locale.ROOT)) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 5f5e1da47184..5176ce0d4805 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1109,10 +1109,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE cause = e) } - def ddlUnsupportedTemporarilyError(ddl: String): SparkUnsupportedOperationException = { + def ddlUnsupportedTemporarilyError( + ddl: String, + tableName: String): SparkUnsupportedOperationException = { new SparkUnsupportedOperationException( - errorClass = "_LEGACY_ERROR_TEMP_2096", - messageParameters = Map("ddl" -> ddl)) + errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION", + messageParameters = Map("tableName" -> toSQLId(tableName), "operation" -> ddl)) } def executeBroadcastTimeoutError(timeout: Long, ex: Option[TimeoutException]): Throwable = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 3c36d3e2d417..9df357d1f270 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -1091,10 +1091,12 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case r: LogicalRDD => RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering, r.stream) :: Nil - case _: UpdateTable => - throw QueryExecutionErrors.ddlUnsupportedTemporarilyError("UPDATE TABLE") - case _: MergeIntoTable => - throw QueryExecutionErrors.ddlUnsupportedTemporarilyError("MERGE INTO TABLE") + case u: UpdateTable => + val tableName = extractTableNameForError(u.table) + throw QueryExecutionErrors.ddlUnsupportedTemporarilyError("UPDATE TABLE", tableName) + case m: MergeIntoTable => + val tableName = extractTableNameForError(m.targetTable) + throw QueryExecutionErrors.ddlUnsupportedTemporarilyError("MERGE INTO TABLE", tableName) case logical.CollectMetrics(name, metrics, child, _) => execution.CollectMetricsExec(name, metrics, planLater(child)) :: Nil case WriteFiles(child, fileFormat, partitionColumns, bucket, options, staticPartitions) => @@ -1105,4 +1107,31 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case _ => Nil } } + + /** + * Extracts a user-friendly table name from a logical plan for error messages. + */ + private def extractTableNameForError(table: LogicalPlan): String = { + import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NamedRelation} + import org.apache.spark.sql.execution.datasources.LogicalRelation + import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + + val unwrapped = EliminateSubqueryAliases(table) + unwrapped match { + // Check specific types before NamedRelation since they extend it + case DataSourceV2Relation(_, _, catalog, Some(ident), _, _) => + (catalog.map(_.name()).toSeq ++ ident.asMultipartIdentifier).mkString(".") + case LogicalRelation(_, _, Some(catalogTable), _, _) => + catalogTable.identifier.unquotedString + case r: NamedRelation => + r.name + case _ => + // Try to get name from SubqueryAlias before unwrapping + table match { + case logical.SubqueryAlias(name, _) => name.toString + case _ => "table" + } + } + } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out index bfdaa6f3a864..06919b86929e 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out @@ -107,9 +107,11 @@ UPDATE IDENTIFIER('ta' || 'b') SET c1 = 2 -- !query analysis org.apache.spark.SparkUnsupportedOperationException { - "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "ddl" : "UPDATE TABLE" + "operation" : "UPDATE TABLE", + "tableName" : "`spark_catalog`.`s`.`tab`" } } @@ -120,9 +122,11 @@ MERGE INTO IDENTIFIER('ta' || 'b') AS t USING IDENTIFIER('ta' || 'b') AS s ON s. -- !query analysis org.apache.spark.SparkUnsupportedOperationException { - "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "ddl" : "MERGE INTO TABLE" + "operation" : "MERGE INTO TABLE", + "tableName" : "`spark_catalog`.`s`.`tab`" } } @@ -1791,329 +1795,6 @@ org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException } --- !query -BEGIN - IDENTIFIER('loop_label'): LOOP - SELECT 1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'IDENTIFIER'", - "hint" : "" - } -} - - --- !query -LEAVE IDENTIFIER('loop_label') --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'LEAVE'", - "hint" : "" - } -} - - --- !query -END LOOP loop_label --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - block_label: BEGIN - DECLARE IDENTIFIER('x') INT DEFAULT 1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -SELECT x --- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`x`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 8, - "fragment" : "x" - } ] -} - - --- !query -END IDENTIFIER('block_label') --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - DECLARE IDENTIFIER('counter') INT DEFAULT 0 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO - SET VAR counter = IDENTIFIER('counter') + 1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'IDENTIFIER'", - "hint" : "" - } -} - - --- !query -END WHILE while_label --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER('counter') --- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`counter`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 28, - "fragment" : "IDENTIFIER('counter')" - } ] -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - DECLARE IDENTIFIER('cnt') INT DEFAULT 0 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -repeat_label: REPEAT - SET VAR IDENTIFIER('cnt') = cnt + 1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'repeat_label'", - "hint" : "" - } -} - - --- !query -UNTIL IDENTIFIER('cnt') >= 2 - END REPEAT IDENTIFIER('repeat_label') --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'UNTIL'", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER('cnt') --- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`cnt`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 24, - "fragment" : "IDENTIFIER('cnt')" - } ] -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO - SELECT row.c1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'IDENTIFIER'", - "hint" : "" - } -} - - --- !query -END FOR IDENTIFIER('for_label') --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab -- !query analysis @@ -2469,57 +2150,6 @@ Sort [c1#x ASC NULLS FIRST], true +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv --- !query -EXECUTE IMMEDIATE - 'BEGIN - DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'''", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER(:var_name) AS result --- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNBOUND_SQL_PARAMETER", - "sqlState" : "42P02", - "messageParameters" : { - "name" : "var_name" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 19, - "stopIndex" : 27, - "fragment" : ":var_name" - } ] -} - - --- !query -END' - USING 'my_variable' AS var_name, 100 AS var_value --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - -- !query EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index 3c795a14c102..637fa9d95573 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -107,9 +107,11 @@ UPDATE IDENTIFIER('ta' || 'b') SET c1 = 2 -- !query analysis org.apache.spark.SparkUnsupportedOperationException { - "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "ddl" : "UPDATE TABLE" + "operation" : "UPDATE TABLE", + "tableName" : "`spark_catalog`.`s`.`tab`" } } @@ -120,9 +122,11 @@ MERGE INTO IDENTIFIER('ta' || 'b') AS t USING IDENTIFIER('ta' || 'b') AS s ON s. -- !query analysis org.apache.spark.SparkUnsupportedOperationException { - "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "ddl" : "MERGE INTO TABLE" + "operation" : "MERGE INTO TABLE", + "tableName" : "`spark_catalog`.`s`.`tab`" } } @@ -1648,329 +1652,6 @@ DROP TEMPORARY FUNCTION test_table_udf DropFunctionCommand test_table_udf, false, true --- !query -BEGIN - IDENTIFIER('loop_label'): LOOP - SELECT 1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -LEAVE IDENTIFIER('loop_label') --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'LEAVE'", - "hint" : "" - } -} - - --- !query -END LOOP loop_label --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - block_label: BEGIN - DECLARE IDENTIFIER('x') INT DEFAULT 1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -SELECT x --- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`x`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 8, - "fragment" : "x" - } ] -} - - --- !query -END IDENTIFIER('block_label') --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - DECLARE IDENTIFIER('counter') INT DEFAULT 0 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO - SET VAR counter = IDENTIFIER('counter') + 1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'IDENTIFIER'", - "hint" : "" - } -} - - --- !query -END WHILE while_label --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER('counter') --- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`counter`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 28, - "fragment" : "IDENTIFIER('counter')" - } ] -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - DECLARE IDENTIFIER('cnt') INT DEFAULT 0 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -repeat_label: REPEAT - SET VAR IDENTIFIER('cnt') = cnt + 1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'repeat_label'", - "hint" : "" - } -} - - --- !query -UNTIL IDENTIFIER('cnt') >= 2 - END REPEAT IDENTIFIER('repeat_label') --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'UNTIL'", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER('cnt') --- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`cnt`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 24, - "fragment" : "IDENTIFIER('cnt')" - } ] -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO - SELECT row.c1 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -END FOR IDENTIFIER('for_label') --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab -- !query analysis @@ -2235,57 +1916,6 @@ Sort [c1#x ASC NULLS FIRST], true +- Relation spark_catalog.default.integration_test[c1#x,c2#x,c4#x] csv --- !query -EXECUTE IMMEDIATE - 'BEGIN - DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'''", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER(:var_name) AS result --- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNBOUND_SQL_PARAMETER", - "sqlState" : "42P02", - "messageParameters" : { - "name" : "var_name" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 19, - "stopIndex" : 27, - "fragment" : ":var_name" - } ] -} - - --- !query -END' - USING 'my_variable' AS var_name, 100 AS var_value --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - -- !query EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index 221025587de3..1c571cfd316e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -247,48 +247,6 @@ RETURN SELECT IDENTIFIER('input_val'), 'result'; SELECT * FROM test_table_udf(42); DROP TEMPORARY FUNCTION test_table_udf; --- SQL Script labels with identifier-lite -BEGIN - IDENTIFIER('loop_label'): LOOP - SELECT 1; - LEAVE IDENTIFIER('loop_label'); - END LOOP loop_label; -END; - --- SQL Script with labeled BEGIN/END block -BEGIN - block_label: BEGIN - DECLARE IDENTIFIER('x') INT DEFAULT 1; - SELECT x; - END IDENTIFIER('block_label'); -END; - --- WHILE loop with identifier-lite label -BEGIN - DECLARE IDENTIFIER('counter') INT DEFAULT 0; - IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO - SET VAR counter = IDENTIFIER('counter') + 1; - END WHILE while_label; - SELECT IDENTIFIER('counter'); -END; - --- REPEAT loop with identifier-lite label -BEGIN - DECLARE IDENTIFIER('cnt') INT DEFAULT 0; - repeat_label: REPEAT - SET VAR IDENTIFIER('cnt') = cnt + 1; - UNTIL IDENTIFIER('cnt') >= 2 - END REPEAT IDENTIFIER('repeat_label'); - SELECT IDENTIFIER('cnt'); -END; - --- FOR loop with identifier-lite -BEGIN - IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO - SELECT row.c1; - END FOR IDENTIFIER('for_label'); -END; - -- Integration tests: Combining parameter markers, string coalescing, and IDENTIFIER -- These tests demonstrate the power of combining IDENTIFIER with parameters @@ -370,14 +328,6 @@ EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab; --- Test 18: IDENTIFIER in DECLARE and SELECT within EXECUTE IMMEDIATE -EXECUTE IMMEDIATE - 'BEGIN - DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value; - SELECT IDENTIFIER(:var_name) AS result; - END' - USING 'my_variable' AS var_name, 100 AS var_value; - -- Test 19: IDENTIFIER with qualified name coalescing for schema.table.column pattern -- This should work for multi-part identifiers EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out index 1c546688e2d1..5d35d7940a84 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out @@ -118,9 +118,11 @@ struct<> -- !query output org.apache.spark.SparkUnsupportedOperationException { - "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "ddl" : "UPDATE TABLE" + "operation" : "UPDATE TABLE", + "tableName" : "`spark_catalog`.`s`.`tab`" } } @@ -133,9 +135,11 @@ struct<> -- !query output org.apache.spark.SparkUnsupportedOperationException { - "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "ddl" : "MERGE INTO TABLE" + "operation" : "MERGE INTO TABLE", + "tableName" : "`spark_catalog`.`s`.`tab`" } } @@ -2037,371 +2041,6 @@ org.apache.spark.sql.catalyst.analysis.NoSuchTempFunctionException } --- !query -BEGIN - IDENTIFIER('loop_label'): LOOP - SELECT 1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'IDENTIFIER'", - "hint" : "" - } -} - - --- !query -LEAVE IDENTIFIER('loop_label') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'LEAVE'", - "hint" : "" - } -} - - --- !query -END LOOP loop_label --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - block_label: BEGIN - DECLARE IDENTIFIER('x') INT DEFAULT 1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -SELECT x --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`x`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 8, - "fragment" : "x" - } ] -} - - --- !query -END IDENTIFIER('block_label') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - DECLARE IDENTIFIER('counter') INT DEFAULT 0 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO - SET VAR counter = IDENTIFIER('counter') + 1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'IDENTIFIER'", - "hint" : "" - } -} - - --- !query -END WHILE while_label --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER('counter') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`counter`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 28, - "fragment" : "IDENTIFIER('counter')" - } ] -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - DECLARE IDENTIFIER('cnt') INT DEFAULT 0 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -repeat_label: REPEAT - SET VAR IDENTIFIER('cnt') = cnt + 1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'repeat_label'", - "hint" : "" - } -} - - --- !query -UNTIL IDENTIFIER('cnt') >= 2 - END REPEAT IDENTIFIER('repeat_label') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'UNTIL'", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER('cnt') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`cnt`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 24, - "fragment" : "IDENTIFIER('cnt')" - } ] -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO - SELECT row.c1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'IDENTIFIER'", - "hint" : "" - } -} - - --- !query -END FOR IDENTIFIER('for_label') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab -- !query schema @@ -2784,63 +2423,6 @@ struct 2 b --- !query -EXECUTE IMMEDIATE - 'BEGIN - DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'''", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER(:var_name) AS result --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNBOUND_SQL_PARAMETER", - "sqlState" : "42P02", - "messageParameters" : { - "name" : "var_name" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 19, - "stopIndex" : 27, - "fragment" : ":var_name" - } ] -} - - --- !query -END' - USING 'my_variable' AS var_name, 100 AS var_value --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - -- !query EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index 387b4e04af4a..886f3cf83bb1 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -118,9 +118,11 @@ struct<> -- !query output org.apache.spark.SparkUnsupportedOperationException { - "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "ddl" : "UPDATE TABLE" + "operation" : "UPDATE TABLE", + "tableName" : "`spark_catalog`.`s`.`tab`" } } @@ -133,9 +135,11 @@ struct<> -- !query output org.apache.spark.SparkUnsupportedOperationException { - "errorClass" : "_LEGACY_ERROR_TEMP_2096", + "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION", + "sqlState" : "0A000", "messageParameters" : { - "ddl" : "MERGE INTO TABLE" + "operation" : "MERGE INTO TABLE", + "tableName" : "`spark_catalog`.`s`.`tab`" } } @@ -1834,371 +1838,6 @@ struct<> --- !query -BEGIN - IDENTIFIER('loop_label'): LOOP - SELECT 1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -LEAVE IDENTIFIER('loop_label') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'LEAVE'", - "hint" : "" - } -} - - --- !query -END LOOP loop_label --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - block_label: BEGIN - DECLARE IDENTIFIER('x') INT DEFAULT 1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -SELECT x --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`x`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 8, - "fragment" : "x" - } ] -} - - --- !query -END IDENTIFIER('block_label') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - DECLARE IDENTIFIER('counter') INT DEFAULT 0 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO - SET VAR counter = IDENTIFIER('counter') + 1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'IDENTIFIER'", - "hint" : "" - } -} - - --- !query -END WHILE while_label --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER('counter') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`counter`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 28, - "fragment" : "IDENTIFIER('counter')" - } ] -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - DECLARE IDENTIFIER('cnt') INT DEFAULT 0 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -repeat_label: REPEAT - SET VAR IDENTIFIER('cnt') = cnt + 1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'repeat_label'", - "hint" : "" - } -} - - --- !query -UNTIL IDENTIFIER('cnt') >= 2 - END REPEAT IDENTIFIER('repeat_label') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'UNTIL'", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER('cnt') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`cnt`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 8, - "stopIndex" : 24, - "fragment" : "IDENTIFIER('cnt')" - } ] -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -BEGIN - IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO - SELECT row.c1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "end of input", - "hint" : "" - } -} - - --- !query -END FOR IDENTIFIER('for_label') --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - --- !query -END --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab -- !query schema @@ -2465,63 +2104,6 @@ struct 3 c --- !query -EXECUTE IMMEDIATE - 'BEGIN - DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'''", - "hint" : "" - } -} - - --- !query -SELECT IDENTIFIER(:var_name) AS result --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNBOUND_SQL_PARAMETER", - "sqlState" : "42P02", - "messageParameters" : { - "name" : "var_name" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 19, - "stopIndex" : 27, - "fragment" : ":var_name" - } ] -} - - --- !query -END' - USING 'my_variable' AS var_name, 100 AS var_value --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'END'", - "hint" : "" - } -} - - -- !query EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 21538ec8e44a..18a04cee7d92 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2312,8 +2312,10 @@ class DataSourceV2SQLSuiteV1Filter exception = intercept[SparkUnsupportedOperationException] { sql(s"UPDATE $t SET name='Robert', age=32 WHERE p=1") }, - condition = "_LEGACY_ERROR_TEMP_2096", - parameters = Map("ddl" -> "UPDATE TABLE") + condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION", + parameters = Map( + "tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`", + "operation" -> "UPDATE TABLE") ) } } @@ -2418,8 +2420,10 @@ class DataSourceV2SQLSuiteV1Filter |WHEN MATCHED AND (target.p > 0) THEN UPDATE SET * |WHEN NOT MATCHED THEN INSERT *""".stripMargin) }, - condition = "_LEGACY_ERROR_TEMP_2096", - parameters = Map("ddl" -> "MERGE INTO TABLE")) + condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION", + parameters = Map( + "tableName" -> "`testcat`.`ns1`.`ns2`.`target`", + "operation" -> "MERGE INTO TABLE")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala index 02b4a19a44ad..9996bec44e23 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala @@ -273,4 +273,66 @@ class SqlScriptingE2eSuite extends QueryTest with SharedSparkSession { condition = "INVALID_QUERY_MIXED_QUERY_PARAMETERS", parameters = Map()) } + + test("SQL Script labels with identifier") { + val sqlScript = + """ + |BEGIN + | IDENTIFIER('loop_label'): LOOP + | SELECT 1; + | LEAVE IDENTIFIER('loop_label'); + | END LOOP IDENTIFIER('loop_label'); + |END""".stripMargin + verifySqlScriptResult(sqlScript, Seq(Row(1))) + } + + test("SQL Script with labeled BEGIN/END block using identifier") { + val sqlScript = + """ + |BEGIN + | IDENTIFIER('block_label'): BEGIN + | DECLARE IDENTIFIER('x') INT DEFAULT 1; + | SELECT x; + | END IDENTIFIER('block_label'); + |END""".stripMargin + verifySqlScriptResult(sqlScript, Seq(Row(1))) + } + + test("WHILE loop with identifier label") { + val sqlScript = + """ + |BEGIN + | DECLARE counter INT DEFAULT 0; + | IDENTIFIER('while_label'): WHILE counter < 3 DO + | SET IDENTIFIER('counter') = counter + 1; + | END WHILE IDENTIFIER('while_label'); + | SELECT counter; + |END""".stripMargin + verifySqlScriptResult(sqlScript, Seq(Row(3))) + } + + test("REPEAT loop with identifier label") { + val sqlScript = + """ + |BEGIN + | DECLARE cnt INT DEFAULT 0; + | repeat_label: REPEAT + | SET cnt = cnt + 1; + | UNTIL cnt >= 2 + | END REPEAT IDENTIFIER('repeat_label'); + | SELECT cnt; + |END""".stripMargin + verifySqlScriptResult(sqlScript, Seq(Row(2))) + } + + test("FOR loop with identifier") { + val sqlScript = + """ + |BEGIN + | IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO + | SELECT row.c1; + | END FOR for_label; + |END""".stripMargin + verifySqlScriptResult(sqlScript, Seq(Row(1))) + } } From e10648c390ad5393e468922400ac709efe11ba01 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 5 Nov 2025 15:04:04 -0800 Subject: [PATCH 07/37] Fix testcases, linting --- .../catalyst/parser/DataTypeAstBuilder.scala | 12 ++--- .../identifier-clause-legacy.sql.out | 46 ++++++++++++------- .../identifier-clause.sql.out | 23 ++++++---- .../sql-tests/inputs/identifier-clause.sql | 20 ++++---- .../results/identifier-clause-legacy.sql.out | 32 +++++++++---- .../results/identifier-clause.sql.out | 16 +++++++ 6 files changed, 102 insertions(+), 47 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 1440fad4ee35..171d1c021754 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -35,10 +35,10 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, /** * AST builder for parsing data type definitions and table schemas. * - * == CRITICAL: Extracting Identifier Names == + * ==CRITICAL: Extracting Identifier Names== * - * When extracting identifier names from parser contexts, you MUST use the helper methods - * provided by this class instead of calling ctx.getText() directly: + * When extracting identifier names from parser contexts, you MUST use the helper methods provided + * by this class instead of calling ctx.getText() directly: * * - '''getIdentifierText(ctx)''': For single identifiers (column names, aliases, window names) * - '''getIdentifierParts(ctx)''': For qualified identifiers (table names, schema.table) @@ -46,9 +46,9 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, * '''DO NOT use ctx.getText() or ctx.identifier.getText()''' directly! These methods do not * handle the IDENTIFIER('literal') syntax and will cause incorrect behavior. * - * The IDENTIFIER('literal') syntax allows string literals to be used as identifiers at parse - * time (e.g., IDENTIFIER('my_col') resolves to the identifier my_col). If you use getText(), - * you'll get the raw text "IDENTIFIER('my_col')" instead of "my_col", breaking the feature. + * The IDENTIFIER('literal') syntax allows string literals to be used as identifiers at parse time + * (e.g., IDENTIFIER('my_col') resolves to the identifier my_col). If you use getText(), you'll + * get the raw text "IDENTIFIER('my_col')" instead of "my_col", breaking the feature. * * Example: * {{{ diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out index 06919b86929e..fdd90aa39489 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out @@ -1451,6 +1451,20 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException } +-- !query +DROP TABLE IF EXISTS tab_renamed +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tab_renamed + + +-- !query +DROP TABLE IF EXISTS tab +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tab + + -- !query CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV -- !query analysis @@ -1611,54 +1625,54 @@ DropTable false, false -- !query -CREATE SCHEMA test_schema +CREATE SCHEMA identifier_clause_test_schema -- !query analysis CreateNamespace false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [test_schema] ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] -- !query -CREATE TABLE test_schema.test_table(c1 INT) USING CSV +CREATE TABLE identifier_clause_test_schema.test_table(c1 INT) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`test_schema`.`test_table`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table`, false -- !query -ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS +ANALYZE TABLE IDENTIFIER('identifier_clause_test_schema.test_table') COMPUTE STATISTICS -- !query analysis -AnalyzeTableCommand `spark_catalog`.`test_schema`.`test_table`, false +AnalyzeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table`, false -- !query -REFRESH TABLE IDENTIFIER('test_schema.test_table') +REFRESH TABLE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query analysis -RefreshTableCommand `spark_catalog`.`test_schema`.`test_table` +RefreshTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table` -- !query -DESCRIBE IDENTIFIER('test_schema.test_table') +DESCRIBE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query analysis -DescribeTableCommand `spark_catalog`.`test_schema`.`test_table`, false, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table`, false, [col_name#x, data_type#x, comment#x] -- !query -SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') +SHOW COLUMNS FROM IDENTIFIER('identifier_clause_test_schema.test_table') -- !query analysis -ShowColumnsCommand `spark_catalog`.`test_schema`.`test_table`, [col_name#x] +ShowColumnsCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table`, [col_name#x] -- !query -DROP TABLE IDENTIFIER('test_schema.test_table') +DROP TABLE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), test_schema.test_table ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_table -- !query -DROP SCHEMA test_schema +DROP SCHEMA identifier_clause_test_schema -- !query analysis DropNamespace false, false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [test_schema] ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index 637fa9d95573..048065fde17c 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -1350,14 +1350,7 @@ org.apache.spark.sql.AnalysisException -- !query ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') -- !query analysis -org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException -{ - "errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS", - "sqlState" : "42P07", - "messageParameters" : { - "relationName" : "`default`.`tab_renamed`" - } -} +AlterTableRenameCommand `spark_catalog`.`default`.`tab`, `tab_renamed`, false -- !query @@ -1368,6 +1361,20 @@ Project [c1#x, c2#x] +- Relation spark_catalog.default.tab_renamed[c1#x,c2#x] csv +-- !query +DROP TABLE IF EXISTS tab_renamed +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tab_renamed + + +-- !query +DROP TABLE IF EXISTS tab +-- !query analysis +DropTable true, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tab + + -- !query CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index 1c571cfd316e..64fb2e069ba8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -182,6 +182,8 @@ SELECT c2 FROM tab; ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2'); ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed'); SELECT * FROM tab_renamed; +DROP TABLE IF EXISTS tab_renamed; +DROP TABLE IF EXISTS tab; -- Error because qualified names are not allowed CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV; @@ -215,15 +217,15 @@ COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment'; ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment'; DROP TABLE test_comment; --- Additional identifier-lite tests with qualified table names in various commands -CREATE SCHEMA test_schema; -CREATE TABLE test_schema.test_table(c1 INT) USING CSV; -ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS; -REFRESH TABLE IDENTIFIER('test_schema.test_table'); -DESCRIBE IDENTIFIER('test_schema.test_table'); -SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table'); -DROP TABLE IDENTIFIER('test_schema.test_table'); -DROP SCHEMA test_schema; +-- Additional identifier tests with qualified table names in various commands +CREATE SCHEMA identifier_clause_test_schema; +CREATE TABLE identifier_clause_test_schema.test_table(c1 INT) USING CSV; +ANALYZE TABLE IDENTIFIER('identifier_clause_test_schema.test_table') COMPUTE STATISTICS; +REFRESH TABLE IDENTIFIER('identifier_clause_test_schema.test_table'); +DESCRIBE IDENTIFIER('identifier_clause_test_schema.test_table'); +SHOW COLUMNS FROM IDENTIFIER('identifier_clause_test_schema.test_table'); +DROP TABLE IDENTIFIER('identifier_clause_test_schema.test_table'); +DROP SCHEMA identifier_clause_test_schema; -- Session variables with identifier-lite DECLARE IDENTIFIER('my_var') = 'value'; diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out index 5d35d7940a84..d0e8a679fa3b 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out @@ -1610,6 +1610,22 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException } +-- !query +DROP TABLE IF EXISTS tab_renamed +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS tab +-- !query schema +struct<> +-- !query output + + + -- !query CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV -- !query schema @@ -1827,7 +1843,7 @@ struct<> -- !query -CREATE SCHEMA test_schema +CREATE SCHEMA identifier_clause_test_schema -- !query schema struct<> -- !query output @@ -1835,7 +1851,7 @@ struct<> -- !query -CREATE TABLE test_schema.test_table(c1 INT) USING CSV +CREATE TABLE identifier_clause_test_schema.test_table(c1 INT) USING CSV -- !query schema struct<> -- !query output @@ -1843,7 +1859,7 @@ struct<> -- !query -ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS +ANALYZE TABLE IDENTIFIER('identifier_clause_test_schema.test_table') COMPUTE STATISTICS -- !query schema struct<> -- !query output @@ -1851,7 +1867,7 @@ struct<> -- !query -REFRESH TABLE IDENTIFIER('test_schema.test_table') +REFRESH TABLE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query schema struct<> -- !query output @@ -1859,7 +1875,7 @@ struct<> -- !query -DESCRIBE IDENTIFIER('test_schema.test_table') +DESCRIBE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query schema struct -- !query output @@ -1867,7 +1883,7 @@ c1 int -- !query -SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') +SHOW COLUMNS FROM IDENTIFIER('identifier_clause_test_schema.test_table') -- !query schema struct -- !query output @@ -1875,7 +1891,7 @@ c1 -- !query -DROP TABLE IDENTIFIER('test_schema.test_table') +DROP TABLE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query schema struct<> -- !query output @@ -1883,7 +1899,7 @@ struct<> -- !query -DROP SCHEMA test_schema +DROP SCHEMA identifier_clause_test_schema -- !query schema struct<> -- !query output diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index 886f3cf83bb1..8cb82018c654 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -1476,6 +1476,22 @@ struct 1 NULL +-- !query +DROP TABLE IF EXISTS tab_renamed +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP TABLE IF EXISTS tab +-- !query schema +struct<> +-- !query output + + + -- !query CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV -- !query schema From 193f9460604f5d0273cf017cf52689f35081018e Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 5 Nov 2025 16:40:55 -0800 Subject: [PATCH 08/37] Code review --- .../sql/catalyst/parser/ParserUtils.scala | 62 +++++++++++-------- .../identifier-clause.sql.out | 32 +++++----- .../results/identifier-clause.sql.out | 16 ++--- 3 files changed, 60 insertions(+), 50 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index 12aec6d7f797..cf468797932c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -48,36 +48,42 @@ object ParserUtils extends SparkParserUtils { throw QueryParsingErrors.invalidStatementError(statement, ctx) } + private val IDENTIFIER_PREFIX = "IDENTIFIER(" + /** * Gets the resolved text of a multipart identifier, handling IDENTIFIER('literal') syntax. - * This parses each part through CatalystSqlParser to resolve identifier-lite expressions. + * This method parses each part through CatalystSqlParser to resolve identifier-lite + * expressions into their actual identifier names. + * + * @param ctx The multipart identifier context from the parse tree. + * @return The resolved identifier text as a dot-separated string. */ def getMultipartIdentifierText(ctx: MultipartIdentifierContext): String = { - // Build the text by resolving each part - val parts = ctx.parts.asScala.flatMap { part => + ctx.parts.asScala.flatMap { part => val partText = part.getText - // Check if this looks like IDENTIFIER('...') - if (partText.startsWith("IDENTIFIER(") && partText.endsWith(")")) { - // Extract the literal string between the parentheses - val literal = partText.substring("IDENTIFIER(".length, partText.length - 1) - // Remove quotes and unescape + // Check if this is an IDENTIFIER('...') literal. + if (partText.startsWith(IDENTIFIER_PREFIX) && partText.endsWith(")")) { + // Extract the literal string between the parentheses. + val literal = partText.substring(IDENTIFIER_PREFIX.length, partText.length - 1) + // Remove quotes and unescape single quotes. val unquoted = if (literal.startsWith("'") && literal.endsWith("'")) { literal.substring(1, literal.length - 1).replace("''", "'") } else { literal } - // Parse as multipart identifier + // Parse as multipart identifier and return the parts. try { CatalystSqlParser.parseMultipartIdentifier(unquoted) } catch { - case _: Exception => Seq(partText) + case _: ParseException => + // If parsing fails, treat the entire text as a single identifier part. + Seq(partText) } } else { - // Regular identifier + // Regular identifier - return as-is. Seq(partText) } }.mkString(".") - parts } def checkDuplicateClauses[T]( @@ -314,19 +320,21 @@ class SqlScriptingLabelContext { */ private def checkLabels( beginLabelCtx: Option[BeginLabelContext], - endLabelCtx: Option[EndLabelContext]) : Unit = { + endLabelCtx: Option[EndLabelContext]): Unit = { (beginLabelCtx, endLabelCtx) match { // Throw an error if labels do not match. - case (Some(bl: BeginLabelContext), Some(el: EndLabelContext)) - if ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()) - .toLowerCase(Locale.ROOT) != - ParserUtils.getMultipartIdentifierText(el.multipartIdentifier()) - .toLowerCase(Locale.ROOT) => - withOrigin(bl) { - throw SqlScriptingErrors.labelsMismatch( - CurrentOrigin.get, - ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()), - ParserUtils.getMultipartIdentifierText(el.multipartIdentifier())) + case (Some(bl: BeginLabelContext), Some(el: EndLabelContext)) => + val beginLabel = ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()) + .toLowerCase(Locale.ROOT) + val endLabel = ParserUtils.getMultipartIdentifierText(el.multipartIdentifier()) + .toLowerCase(Locale.ROOT) + if (beginLabel != endLabel) { + withOrigin(bl) { + throw SqlScriptingErrors.labelsMismatch( + CurrentOrigin.get, + ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()), + ParserUtils.getMultipartIdentifierText(el.multipartIdentifier())) + } } // Throw an error if label is qualified. case (Some(bl: BeginLabelContext), _) @@ -342,7 +350,8 @@ class SqlScriptingLabelContext { case (None, Some(el: EndLabelContext)) => withOrigin(el) { throw SqlScriptingErrors.endLabelWithoutBeginLabel( - CurrentOrigin.get, ParserUtils.getMultipartIdentifierText(el.multipartIdentifier())) + CurrentOrigin.get, + ParserUtils.getMultipartIdentifierText(el.multipartIdentifier())) } case _ => } @@ -350,7 +359,8 @@ class SqlScriptingLabelContext { /** Check if the label is defined. */ private def isLabelDefined(beginLabelCtx: Option[BeginLabelContext]): Boolean = { - beginLabelCtx.map(_.multipartIdentifier().getText).isDefined + beginLabelCtx.map(ctx => + ParserUtils.getMultipartIdentifierText(ctx.multipartIdentifier())).isDefined } /** @@ -418,7 +428,7 @@ class SqlScriptingLabelContext { /** * Enter a for loop scope. - * If the for loop variable is defined, it will be asserted to not be inside seenLabels; + * If the for loop variable is defined, it will be asserted to not be inside seenLabels. * Then, if the for loop variable is defined, it will be added to seenLabels. */ def enterForScope(identifierCtx: Option[MultipartIdentifierContext]): Unit = { diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index 048065fde17c..b751244263c6 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -1533,54 +1533,54 @@ DropTable false, false -- !query -CREATE SCHEMA test_schema +CREATE SCHEMA identifier_clause_test_schema -- !query analysis CreateNamespace false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [test_schema] ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] -- !query -CREATE TABLE test_schema.test_table(c1 INT) USING CSV +CREATE TABLE identifier_clause_test_schema.test_table(c1 INT) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`test_schema`.`test_table`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table`, false -- !query -ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS +ANALYZE TABLE IDENTIFIER('identifier_clause_test_schema.test_table') COMPUTE STATISTICS -- !query analysis -AnalyzeTableCommand `spark_catalog`.`test_schema`.`test_table`, false +AnalyzeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table`, false -- !query -REFRESH TABLE IDENTIFIER('test_schema.test_table') +REFRESH TABLE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query analysis -RefreshTableCommand `spark_catalog`.`test_schema`.`test_table` +RefreshTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table` -- !query -DESCRIBE IDENTIFIER('test_schema.test_table') +DESCRIBE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query analysis -DescribeTableCommand `spark_catalog`.`test_schema`.`test_table`, false, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table`, false, [col_name#x, data_type#x, comment#x] -- !query -SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') +SHOW COLUMNS FROM IDENTIFIER('identifier_clause_test_schema.test_table') -- !query analysis -ShowColumnsCommand `spark_catalog`.`test_schema`.`test_table`, [col_name#x] +ShowColumnsCommand `spark_catalog`.`identifier_clause_test_schema`.`test_table`, [col_name#x] -- !query -DROP TABLE IDENTIFIER('test_schema.test_table') +DROP TABLE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), test_schema.test_table ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_table -- !query -DROP SCHEMA test_schema +DROP SCHEMA identifier_clause_test_schema -- !query analysis DropNamespace false, false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [test_schema] ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index 8cb82018c654..db7345984716 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -1707,7 +1707,7 @@ struct<> -- !query -CREATE SCHEMA test_schema +CREATE SCHEMA identifier_clause_test_schema -- !query schema struct<> -- !query output @@ -1715,7 +1715,7 @@ struct<> -- !query -CREATE TABLE test_schema.test_table(c1 INT) USING CSV +CREATE TABLE identifier_clause_test_schema.test_table(c1 INT) USING CSV -- !query schema struct<> -- !query output @@ -1723,7 +1723,7 @@ struct<> -- !query -ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS +ANALYZE TABLE IDENTIFIER('identifier_clause_test_schema.test_table') COMPUTE STATISTICS -- !query schema struct<> -- !query output @@ -1731,7 +1731,7 @@ struct<> -- !query -REFRESH TABLE IDENTIFIER('test_schema.test_table') +REFRESH TABLE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query schema struct<> -- !query output @@ -1739,7 +1739,7 @@ struct<> -- !query -DESCRIBE IDENTIFIER('test_schema.test_table') +DESCRIBE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query schema struct -- !query output @@ -1747,7 +1747,7 @@ c1 int -- !query -SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') +SHOW COLUMNS FROM IDENTIFIER('identifier_clause_test_schema.test_table') -- !query schema struct -- !query output @@ -1755,7 +1755,7 @@ c1 -- !query -DROP TABLE IDENTIFIER('test_schema.test_table') +DROP TABLE IDENTIFIER('identifier_clause_test_schema.test_table') -- !query schema struct<> -- !query output @@ -1763,7 +1763,7 @@ struct<> -- !query -DROP SCHEMA test_schema +DROP SCHEMA identifier_clause_test_schema -- !query schema struct<> -- !query output From 9c8a5ff3465da9ecfc4605694586d6801e0eb53d Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 5 Nov 2025 17:49:30 -0800 Subject: [PATCH 09/37] Code review improvements: refactor ParserUtils for better clarity and maintainability - Extract IDENTIFIER_PREFIX constant for magic string - Improve getMultipartIdentifierText documentation with complete Scaladoc - Narrow exception handling from Exception to ParseException - Remove redundant ParserUtils prefix in class methods - Fix qualified label validation to check resolved identifiers - Ensure all comments are complete sentences ending with periods - Remove dead code and improve variable naming - Fix FOR loop variable resolution to use getMultipartIdentifierText All tests pass: - SqlScriptingParserSuite (qualified label validation) - SqlScriptingE2eSuite (identifier tests) - SQLQueryTestSuite (identifier-clause and identifier-clause-legacy) --- COMPARISON_SUMMARY.md | 79 ++ IDENTIFIER_LITE_DESIGN.md | 264 ++++ IDENTIFIER_LITE_LEGACY_CONFIG.md | 190 +++ IDENTIFIER_LITE_SUMMARY.md | 126 ++ MATCH_RECOGNIZE_SYNTAX.md | 570 ++++++++ MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md | 746 ++++++++++ identifier-clause-comparison-v2.csv | 274 ++++ identifier-clause-comparison-v2.md | 452 ++++++ identifier-clause-comparison.csv | 274 ++++ identifier-clause-comparison.md | 248 ++++ identifier-clause-differences.txt | 364 +++++ .../sql/catalyst/parser/ParserUtils.scala | 24 +- ...xecute-immediate-legacy-identifier.sql.out | 1226 +++++++++++++++++ 13 files changed, 4827 insertions(+), 10 deletions(-) create mode 100644 COMPARISON_SUMMARY.md create mode 100644 IDENTIFIER_LITE_DESIGN.md create mode 100644 IDENTIFIER_LITE_LEGACY_CONFIG.md create mode 100644 IDENTIFIER_LITE_SUMMARY.md create mode 100644 MATCH_RECOGNIZE_SYNTAX.md create mode 100644 MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md create mode 100644 identifier-clause-comparison-v2.csv create mode 100644 identifier-clause-comparison-v2.md create mode 100644 identifier-clause-comparison.csv create mode 100644 identifier-clause-comparison.md create mode 100644 identifier-clause-differences.txt create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate-legacy-identifier.sql.out diff --git a/COMPARISON_SUMMARY.md b/COMPARISON_SUMMARY.md new file mode 100644 index 000000000000..bc5f87021b10 --- /dev/null +++ b/COMPARISON_SUMMARY.md @@ -0,0 +1,79 @@ +# Identifier-Lite Implementation: Regression Check Summary + +## Files Generated + +1. **identifier-clause-comparison-v2.csv** - Raw CSV data with all test results +2. **identifier-clause-comparison-v2.md** - Formatted markdown table with analysis +3. **COMPARISON_SUMMARY.md** (this file) - Regression check summary + +## Regression Analysis + +### ✅ Result: NO REGRESSIONS FOUND + +Comparing the previous version (v1) with the current version (v2): +- **Total queries compared**: 227 +- **Regressions (was SUCCESS, now error)**: 0 +- **Improvements (was error, now SUCCESS)**: 0 +- **Unchanged**: 227 (100%) + +### Test Statistics + +- **Total Tests**: 227 +- **Tests from Master (baseline)**: 128 +- **New Tests Added**: 99 +- **Tests Changed from Master**: 13 (all improvements) +- **Tests with Legacy Mode Differences**: 47 (20.7%) + +## Master Comparison + +### Tests Changed from Master (13 tests - all improvements): + +1. **Query #114**: `SELECT row_number() OVER IDENTIFIER('x.win')...` + - Master: `PARSE_SYNTAX_ERROR` + - Current: `IDENTIFIER_TOO_MANY_NAME_PARTS` (better error message) + +2. **Query #115**: `SELECT T1.c1 FROM... JOIN... USING (IDENTIFIER('c1'))` + - Master: `PARSE_SYNTAX_ERROR` + - Current: `SUCCESS` ✅ + +3. **Query #117**: `SELECT map('a', 1).IDENTIFIER('a')` + - Master: `PARSE_SYNTAX_ERROR` + - Current: `SUCCESS` ✅ + +4. **Query #118**: `SELECT named_struct('a', 1).IDENTIFIER('a')` + - Master: `PARSE_SYNTAX_ERROR` + - Current: `SUCCESS` ✅ + +5. **Queries #119-123**: Window specs and dereference improvements + - Multiple queries that were failing now work or have better error messages + +6. **Queries #126-130**: DDL improvements (CREATE VIEW, CREATE TABLE, INSERT with column lists) + - These now work correctly with identifier-lite + +## Known Issues + +### 🐛 Unfixed Bug: `IDENTIFIER('t').c1` + +**Query**: `SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1)` +**Status**: Still fails with `UNRESOLVED_COLUMN.WITH_SUGGESTION` +**Expected**: Should resolve as table-qualified column reference and return `1` + +**Root Cause**: +- `IDENTIFIER_KW` is in the `nonReserved` keyword list +- Parser matches `IDENTIFIER` as a function name (via `qualifiedName` → `nonReserved`) +- Then treats `('t')` as function arguments +- Result: creates wrong AST structure + +**Investigation**: +- Attempted grammar reordering: broke other tests +- Attempted adding predicates to `functionName`: didn't prevent matching via `qualifiedName` +- Needs AST-level fix or removal of `IDENTIFIER_KW` from `nonReserved` (may have side effects) + +## Conclusion + +✅ **Safe to proceed**: No regressions introduced +✅ **Improvements made**: 13 tests that were broken now work or have better errors +✅ **New functionality**: 99 new tests covering identifier-lite features +⚠️ **One known bug**: `IDENTIFIER('t').c1` case - documented but unfixed + +The implementation is stable and provides significant improvements over master, with one edge case remaining to be fixed in future work. diff --git a/IDENTIFIER_LITE_DESIGN.md b/IDENTIFIER_LITE_DESIGN.md new file mode 100644 index 000000000000..b98cc22fe44a --- /dev/null +++ b/IDENTIFIER_LITE_DESIGN.md @@ -0,0 +1,264 @@ +# Identifier-Lite Feature Design + +## Overview + +The **identifier-lite** feature is a simplified version of the existing `IDENTIFIER` clause in Spark SQL. It allows `IDENTIFIER('string_literal')` to be used anywhere identifiers can appear in SQL statements, with the string literal being folded immediately during the parse phase. + +## Motivation + +The existing `IDENTIFIER` clause in Spark is limited to a narrow set of use cases: +- It only works in specific grammar positions (table references, column references, function names) +- It requires analysis-time resolution via `PlanWithUnresolvedIdentifier` and `ExpressionWithUnresolvedIdentifier` +- It supports full expressions (including parameter markers and concatenation) + +The identifier-lite feature generalizes identifier templating to **all places where identifiers can be used**, while simplifying the implementation by: +- Only accepting string literals (not arbitrary expressions) +- Folding the string literal into an identifier at parse time (not analysis time) +- Working seamlessly with all existing grammar rules that use identifiers + +## Design + +### Grammar Changes + +#### SqlBaseParser.g4 + +Added a new alternative to the `strictIdentifier` grammar rule: + +```antlr +strictIdentifier + : IDENTIFIER #unquotedIdentifier + | quotedIdentifier #quotedIdentifierAlternative + | IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN #identifierLiteral + | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier + | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier + ; +``` + +This allows `IDENTIFIER('string')` to appear anywhere a regular identifier can appear, including: +- Table names +- Column names +- Schema/database names +- Function names +- Constraint names +- And any other identifier context + +### Qualified Identifier Support + +The identifier-lite feature supports **qualified identifiers** within the string literal. When you write: +- `IDENTIFIER('`catalog`.`schema`')` - this is parsed into multiple parts: `['catalog', 'schema']` +- `IDENTIFIER('schema.table')` - parsed into: `['schema', 'table']` +- `IDENTIFIER('schema').table` - the schema part is parsed, then combined with the literal `table` + +This allows flexible composition of identifiers: +```sql +-- These are all equivalent for table 'catalog.schema.table': +IDENTIFIER('catalog.schema.table') +IDENTIFIER('catalog.schema').table +IDENTIFIER('catalog').schema.table +catalog.IDENTIFIER('schema.table') +catalog.IDENTIFIER('schema').table +``` + +### Parser Implementation Changes + +#### DataTypeAstBuilder.scala + +Added helper methods to handle identifier-lite with qualified identifier support: + +```scala +protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = { + ctx match { + case idLitCtx: IdentifierLiteralContext => + // For IDENTIFIER('literal'), extract the string literal value and parse it + val literalValue = string(visitStringLit(idLitCtx.stringLit())) + // Parse the string as a multi-part identifier (e.g., "`cat`.`schema`" -> Seq("cat", "schema")) + CatalystSqlParser.parseMultipartIdentifier(literalValue) + case _ => + // For regular identifiers, just return the text as a single part + Seq(ctx.getText) + } +} + +protected def getIdentifierText(ctx: ParserRuleContext): String = { + getIdentifierParts(ctx).mkString(".") +} +``` + +Updated `visitMultipartIdentifier()` to flatten parts when an identifier-lite contains multiple parts: + +```scala +override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] = + ctx.parts.asScala.flatMap { part => + val identifierCtx = part.identifier() + if (identifierCtx != null && identifierCtx.strictIdentifier() != null) { + // Returns Seq with 1+ elements (multiple if qualified) + getIdentifierParts(identifierCtx.strictIdentifier()) + } else { + Seq(part.getText) + } + }.toSeq +``` + +#### AstBuilder.scala + +Updated all methods that extract identifier text to use `getIdentifierParts()`: +- `visitIdentifierSeq()` - uses `getIdentifierText()` to keep list items as single strings +- `visitTableIdentifier()` - combines db and table parts from qualified identifiers +- `visitFunctionIdentifier()` - combines db and function parts from qualified identifiers +- `visitColDefinition()` - extracts column names +- Column name extraction in various contexts + +Special handling for `TableIdentifier` and `FunctionIdentifier` to properly combine parts: +```scala +override def visitTableIdentifier(ctx: TableIdentifierContext): TableIdentifier = { + val tableParts = getIdentifierParts(ctx.table.strictIdentifier()) + val dbParts = Option(ctx.db).map(db => getIdentifierParts(db.strictIdentifier())) + val allParts = dbParts.getOrElse(Seq.empty) ++ tableParts + + allParts match { + case Seq(table) => TableIdentifier(table, None) + case parts if parts.size >= 2 => + TableIdentifier(parts.last, Some(parts.dropRight(1).mkString("."))) + } +} +``` + +## Key Differences from Full IDENTIFIER Clause + +| Feature | Full IDENTIFIER Clause | Identifier-Lite | +|---------|----------------------|-----------------| +| **Syntax** | `IDENTIFIER(expression)` | `IDENTIFIER('literal')` | +| **Supported Arguments** | Any constant string expression (including parameter markers, variables, concatenation) | Only string literals | +| **Resolution Time** | Analysis phase (via `PlanWithUnresolvedIdentifier`) | Parse phase (immediately folded) | +| **Grammar Positions** | Limited to specific rules (`identifierReference`, `functionName`) | All positions where identifiers are used | +| **Use Case** | Dynamic identifier resolution with runtime values | Static identifier specification with unusual names | + +## Usage Examples + +### Table Names + +```sql +-- Create table with identifier-lite +CREATE TABLE IDENTIFIER('my_table') (c1 INT); + +-- Query table +SELECT * FROM IDENTIFIER('my_table'); + +-- Qualified table name (fully specified) +SELECT * FROM IDENTIFIER('schema.table'); + +-- Qualified table name (partial specification) +SELECT * FROM IDENTIFIER('schema').table; + +-- Qualified with backticks +SELECT * FROM IDENTIFIER('`my schema`.`my table`'); +``` + +### Column Names + +```sql +-- Select specific columns +SELECT IDENTIFIER('col1'), IDENTIFIER('col2') FROM t; + +-- Column with special characters +SELECT IDENTIFIER('`column with spaces`') FROM t; + +-- Mixed usage +CREATE TABLE t(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING); + +-- Qualified column references +SELECT IDENTIFIER('t.col1') FROM t; +``` + +### Function Names + +```sql +-- Use identifier-lite for function names +SELECT IDENTIFIER('abs')(-5); +SELECT IDENTIFIER('upper')('hello'); + +-- Qualified function names +SELECT IDENTIFIER('schema.my_udf')(value) FROM t; +``` + +### DDL Operations + +```sql +-- ALTER TABLE operations +ALTER TABLE IDENTIFIER('table_name') ADD COLUMN IDENTIFIER('new_col') INT; +ALTER TABLE IDENTIFIER('table_name') RENAME COLUMN IDENTIFIER('old') TO IDENTIFIER('new'); + +-- DROP operations with qualified names +DROP TABLE IDENTIFIER('schema.table_name'); + +-- Mixed qualification +DROP TABLE IDENTIFIER('schema').table_name; +``` + +### Complex Qualified Identifier Examples + +```sql +-- Three-part identifier (catalog.schema.table) +SELECT * FROM IDENTIFIER('catalog.schema.table'); + +-- Equivalent forms: +SELECT * FROM IDENTIFIER('catalog.schema').table; +SELECT * FROM IDENTIFIER('catalog').schema.table; +SELECT * FROM catalog.IDENTIFIER('schema.table'); +SELECT * FROM catalog.IDENTIFIER('schema').table; + +-- With backticked parts +SELECT * FROM IDENTIFIER('`my catalog`.`my schema`.`my table`'); + +-- Mixed backticks and regular identifiers +SELECT * FROM IDENTIFIER('`my catalog`.`my schema`').regular_table; +``` + +## Implementation Files Modified + +1. **Grammar Files**: + - `sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4` + - `sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4` (no changes - reusing existing `IDENTIFIER_KW`) + +2. **Parser Implementation**: + - `sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala` + - `sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala` + +3. **Test Files**: + - `sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql` (merged identifier-lite tests) + - `sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala` (added `IdentifierLiteSuite`) + +## Limitations + +1. **No Expression Support**: Identifier-lite only accepts string literals. Expressions like `IDENTIFIER('tab' || '_name')` or parameter markers like `IDENTIFIER(:param)` are not supported. For these use cases, the full IDENTIFIER clause should be used instead. + +2. **No Runtime Binding**: Since the identifier is folded at parse time, it cannot be changed dynamically. For dynamic identifier binding, use the full IDENTIFIER clause with parameter markers or variables. + +3. **String Literal Only**: The argument must be a string literal (`'value'` or `"value"`). Variables, parameter markers, and expressions are not supported. + +## Testing + +Test coverage includes: +1. Basic usage with table names, column names, and function names +2. Qualified identifiers (e.g., `schema.table`, `catalog.schema.table`) +3. Identifiers with special characters (backticked identifiers) +4. DDL operations (CREATE, ALTER, DROP) +5. Mixed usage with regular identifiers +6. Column definitions using identifier-lite +7. ALTER TABLE operations with identifier-lite (RENAME COLUMN, ADD COLUMN, DROP COLUMN, RENAME TABLE) +8. **Qualified table references with identifier-lite:** + - `IDENTIFIER('schema.table')` - fully qualified in one literal + - `IDENTIFIER('schema').table` - partial qualification + - `IDENTIFIER('`schema`.`table`')` - with backticks + - Mixed forms with both identifier-lite and regular identifiers + +All identifier-lite tests have been integrated into the existing `identifier-clause.sql` test suite under a dedicated section, making it easy to see the distinction between: +- Full IDENTIFIER clause tests (using expressions, concatenation, variables) +- Identifier-lite tests (using only string literals) + +## Future Enhancements + +Potential future improvements: +1. Better error messages when users try to use expressions instead of literals +2. Support for identifier-lite in additional contexts (e.g., constraint names, index names) +3. Documentation updates in the SQL reference guide diff --git a/IDENTIFIER_LITE_LEGACY_CONFIG.md b/IDENTIFIER_LITE_LEGACY_CONFIG.md new file mode 100644 index 000000000000..a8c817451839 --- /dev/null +++ b/IDENTIFIER_LITE_LEGACY_CONFIG.md @@ -0,0 +1,190 @@ +# Legacy Configuration for IDENTIFIER Clause + +## Overview + +The identifier-lite feature introduces `IDENTIFIER('literal')` syntax that resolves string literals to identifiers at parse time. To maintain backward compatibility with the legacy `IDENTIFIER(expression)` behavior, a configuration option is provided. + +## Configuration + +### `spark.sql.legacy.identifierClause` + +- **Type**: Boolean +- **Default**: `false` (identifier-lite enabled) +- **Internal**: Yes +- **Since**: 4.1.0 + +### Behavior + +#### Default Behavior (`false`) +When `spark.sql.legacy.identifierClause = false` (default): +- **NEW**: `IDENTIFIER('literal')` is resolved at parse time to the identifier `literal` +- **LEGACY**: `IDENTIFIER(expression)` still works for dynamic table/schema references + +Examples: +```sql +-- Identifier-lite: Resolved at parse time +SELECT IDENTIFIER('col1') FROM t; -- Same as: SELECT col1 FROM t + +-- Parameter markers work with identifier-lite +SELECT IDENTIFIER(:param) FROM t; -- If :param = 'col1', same as SELECT col1 FROM t + +-- String coalescing works with identifier-lite +SELECT IDENTIFIER('col' '1') FROM t; -- Same as: SELECT col1 FROM t + +-- Legacy IDENTIFIER clause still works +DECLARE table_name = 'my_table'; +SELECT * FROM IDENTIFIER(table_name); -- Evaluated at analysis time +``` + +#### Legacy-Only Behavior (`true`) +When `spark.sql.legacy.identifierClause = true`: +- **DISABLED**: `IDENTIFIER('literal')` is NOT allowed +- **LEGACY ONLY**: Only `IDENTIFIER(expression)` is allowed + +Examples: +```sql +SET spark.sql.legacy.identifierClause = true; + +-- This will FAIL with parse error +SELECT IDENTIFIER('col1') FROM t; + +-- Only the legacy dynamic form works +DECLARE table_name = 'my_table'; +SELECT * FROM IDENTIFIER(table_name); -- Works +``` + +## Implementation Details + +### Grammar Rule Guards + +The identifier-lite alternatives are guarded by `{!legacy_identifier_clause_only}?` predicates: + +```antlr +strictIdentifier + : IDENTIFIER #unquotedIdentifier + | quotedIdentifier #quotedIdentifierAlternative + | {!legacy_identifier_clause_only}? IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN #identifierLiteral + | ... + ; + +errorCapturingIdentifier + : identifier errorCapturingIdentifierExtra #errorCapturingIdentifierBase + | {!legacy_identifier_clause_only}? IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN errorCapturingIdentifierExtra #identifierLiteralWithExtra + ; +``` + +### Parser Precedence + +The `identifierReference` rule is ordered to prioritize the legacy syntax: + +```antlr +identifierReference + : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN // Legacy: try first + | multipartIdentifier // Identifier-lite: try second + ; +``` + +This ensures that when identifier-lite is enabled, the parser: +1. First tries to match the legacy `IDENTIFIER(expression)` syntax +2. Only if that fails (e.g., because it's a string literal), falls back to matching identifier-lite through `multipartIdentifier` + +### Configuration Flow + +1. **SQLConf.scala**: Defines `LEGACY_IDENTIFIER_CLAUSE_ONLY` config +2. **SqlApiConf.scala**: Trait method `def legacyIdentifierClauseOnly: Boolean` +3. **SQLConf.scala**: Implementation `getConf(LEGACY_IDENTIFIER_CLAUSE_ONLY)` +4. **parsers.scala**: Sets parser boolean: `parser.legacy_identifier_clause_only = conf.legacyIdentifierClauseOnly` +5. **SqlBaseParser.g4**: Grammar predicates check `{!legacy_identifier_clause_only}?` + +## Use Cases + +### When to Use Legacy Mode (`true`) + +1. **Backward Compatibility**: Existing applications that rely exclusively on the legacy `IDENTIFIER(expression)` behavior +2. **Migration Period**: Temporarily disable identifier-lite while migrating code +3. **Testing**: Verify that code doesn't accidentally use identifier-lite syntax + +### Recommended Settings + +- **New Applications**: Keep default (`false`) to use identifier-lite +- **Existing Applications**: Test with default (`false`); use legacy mode (`true`) only if needed +- **Production**: Use default (`false`) for maximum flexibility + +## Examples + +### Complete Example: Both Modes + +```sql +-- Default mode (identifier-lite enabled) +SET spark.sql.legacy.identifierClause = false; + +CREATE TABLE my_table(col1 INT, col2 STRING); + +-- Identifier-lite works +SELECT IDENTIFIER('col1') FROM my_table; -- Returns col1 values + +-- Legacy still works +DECLARE tab_name = 'my_table'; +SELECT * FROM IDENTIFIER(tab_name); -- Returns all rows + +-- With parameters +SELECT IDENTIFIER(:col) FROM IDENTIFIER(:tab) USING 'col1' AS col, 'my_table' AS tab; + +--- + +-- Legacy mode (identifier-lite disabled) +SET spark.sql.legacy.identifierClause = true; + +-- Identifier-lite FAILS +SELECT IDENTIFIER('col1') FROM my_table; -- PARSE ERROR + +-- Legacy still works +DECLARE tab_name = 'my_table'; +SELECT * FROM IDENTIFIER(tab_name); -- Returns all rows +``` + +## Testing + +The legacy behavior is tested in: +- `SQLViewTestSuite` - Test "SPARK-51552: Temporary variables under identifiers are not allowed in persisted view" + - Verifies that legacy `IDENTIFIER(variable)` correctly evaluates at analysis time + - Ensures proper error messages when temporary objects are referenced in persisted views + +## Related Configurations + +- `spark.sql.legacy.parameterSubstitution.constantsOnly`: Controls where parameter markers are allowed +- `spark.sql.legacy.setopsPrecedence.enabled`: Controls set operation precedence +- Both follow the same pattern of using grammar predicates for conditional syntax + +## Migration Guide + +### From Legacy to Identifier-Lite + +1. **Audit Code**: Find all uses of `IDENTIFIER(expression)` where `expression` is a variable +2. **Replace with String Literals**: + ```sql + -- Before (legacy) + DECLARE col_name = 'my_col'; + SELECT IDENTIFIER(col_name) FROM t; + + -- After (identifier-lite with parameters) + SELECT IDENTIFIER(:col) FROM t USING 'my_col' AS col; + ``` +3. **Test**: Verify all queries work with default config +4. **Deploy**: Use default `spark.sql.legacy.identifierClause = false` + +### If You Must Stay on Legacy + +Set the configuration globally or per-session: +```sql +-- Spark SQL +SET spark.sql.legacy.identifierClause = true; + +-- Spark properties file +spark.sql.legacy.identifierClause=true + +-- SparkSession builder +spark.conf.set("spark.sql.legacy.identifierClause", "true") +``` + + diff --git a/IDENTIFIER_LITE_SUMMARY.md b/IDENTIFIER_LITE_SUMMARY.md new file mode 100644 index 000000000000..e3c26f39d2bb --- /dev/null +++ b/IDENTIFIER_LITE_SUMMARY.md @@ -0,0 +1,126 @@ +# Identifier-Lite Implementation Summary + +## Completed Tasks + +✅ **Grammar Changes** +- Modified `SqlBaseParser.g4` to add `IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN #identifierLiteral` as a new alternative in `strictIdentifier` +- No changes needed to `SqlBaseLexer.g4` - reused existing `IDENTIFIER_KW` token + +✅ **Parser Implementation** +- Added `getIdentifierText()` helper method in `DataTypeAstBuilder.scala` to extract identifier text from both regular identifiers and identifier-lite syntax +- Updated `visitMultipartIdentifier()` in `DataTypeAstBuilder.scala` to handle identifier-lite +- Updated `AstBuilder.scala` methods: + - `visitIdentifierSeq()` + - `visitTableIdentifier()` + - `visitFunctionIdentifier()` + - `visitColDefinition()` + - Column name extraction in `visitHiveChangeColumn()` + - Column name extraction in `visitColType()` (DataTypeAstBuilder) + +✅ **Test Coverage** +- Merged identifier-lite tests into existing `identifier-clause.sql` test suite +- Added dedicated section for identifier-lite tests with clear comments distinguishing them from full IDENTIFIER clause tests +- Created `IdentifierLiteSuite` class in `ParametersSuite.scala` with unit tests +- Test coverage includes: + - Column definitions with identifier-lite + - ALTER TABLE operations (RENAME COLUMN, ADD COLUMN, DROP COLUMN, RENAME TABLE) + - Qualified table references + - Function names + - Mixed usage scenarios + +✅ **Documentation** +- Created `IDENTIFIER_LITE_DESIGN.md` with comprehensive design documentation + +## Key Features + +### What Works Now + +The identifier-lite feature allows `IDENTIFIER('string_literal')` to be used in **all** positions where identifiers can appear: + +1. **Table Names** + ```sql + CREATE TABLE IDENTIFIER('my_table') (c1 INT); + SELECT * FROM IDENTIFIER('schema.table'); + ``` + +2. **Column Names** (including in column definitions) + ```sql + CREATE TABLE t(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING); + SELECT IDENTIFIER('col1') FROM t; + ``` + +3. **Function Names** + ```sql + SELECT IDENTIFIER('abs')(-5); + ``` + +4. **Schema Names** + ```sql + CREATE SCHEMA IDENTIFIER('my_schema'); + USE IDENTIFIER('my_schema'); + ``` + +5. **ALTER TABLE Operations** + ```sql + ALTER TABLE IDENTIFIER('t') RENAME COLUMN IDENTIFIER('old') TO IDENTIFIER('new'); + ALTER TABLE IDENTIFIER('t') ADD COLUMN IDENTIFIER('col') INT; + ``` + +6. **Qualified Identifiers** + ```sql + SELECT * FROM IDENTIFIER('schema.table.column'); + ``` + +### Key Differences from Full IDENTIFIER Clause + +| Aspect | Full IDENTIFIER | Identifier-Lite | +|--------|----------------|-----------------| +| **Syntax** | `IDENTIFIER(expr)` | `IDENTIFIER('literal')` | +| **Arguments** | Any constant expression | String literals only | +| **Resolution** | Analysis phase | Parse phase (immediate) | +| **Grammar Scope** | Limited positions | All identifier positions | + +## Files Modified + +1. `sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4` +2. `sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala` +3. `sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala` +4. `sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql` +5. `sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala` + +## Next Steps + +To complete the implementation: + +1. **Build & Test**: Run the full test suite to ensure all tests pass + ```bash + build/mvn clean test -pl sql/catalyst,sql/api,sql/core + ``` + +2. **Generate Parser**: ANTLR needs to regenerate parser classes from the grammar + ```bash + build/mvn clean compile -pl sql/api + ``` + +3. **Run Specific Tests**: + ```bash + build/mvn test -pl sql/core -Dtest=IdentifierLiteSuite + build/sbt "sql/testOnly *identifier-clause*" + ``` + +4. **Update Documentation**: Consider adding user-facing documentation to `docs/sql-ref-identifier-clause.md` + +## Design Decisions + +1. **Reused IDENTIFIER keyword**: No new keyword needed; distinction is based on argument type (literal vs expression) and resolution time +2. **Parse-time folding**: String literals are resolved immediately during parsing for simplicity and universal applicability +3. **Universal applicability**: Works in all identifier positions without special grammar rules +4. **Clean separation**: Tests clearly distinguish identifier-lite (literals) from full IDENTIFIER (expressions) + +## Benefits + +1. **Simplicity**: Parse-time folding is simpler than analysis-time resolution +2. **Universality**: Works everywhere identifiers are used, no special cases +3. **Backward Compatible**: Existing IDENTIFIER clause with expressions continues to work +4. **Clear Semantics**: String literal-only restriction makes behavior predictable + diff --git a/MATCH_RECOGNIZE_SYNTAX.md b/MATCH_RECOGNIZE_SYNTAX.md new file mode 100644 index 000000000000..a16dee1907e4 --- /dev/null +++ b/MATCH_RECOGNIZE_SYNTAX.md @@ -0,0 +1,570 @@ +# MATCH_RECOGNIZE Clause - Syntax Description + +## Overview + +The `MATCH_RECOGNIZE` clause is a powerful SQL feature for performing pattern recognition and sequence analysis over ordered sets of rows. It enables detection of patterns in time-series data, event sequences, and other ordered datasets using a regular expression-like syntax. + +## General Syntax + +```sql +MATCH_RECOGNIZE ( + [ PARTITION BY partition_expression [, ...] ] + ORDER BY order_expression [ ASC | DESC ] [, ...] + MEASURES measure_expression AS alias [, ...] + [ ONE ROW PER MATCH | ALL ROWS PER MATCH ] + [ AFTER MATCH skip_clause ] + PATTERN ( pattern_expression ) + [ SUBSET subset_definition [, ...] ] + DEFINE pattern_variable AS condition [, ...] +) +``` + +## Clause Components + +### 1. PARTITION BY (Optional) + +**Purpose**: Divides the input data into independent partitions for parallel pattern matching. + +**Syntax**: +```sql +PARTITION BY column_expression [, column_expression ...] +``` + +**Examples from corpus**: +```sql +PARTITION BY match_0_0 +PARTITION BY accountRegion +PARTITION BY field_name +``` + +**Notes**: +- Pattern matching is performed independently within each partition +- Similar to window function partitioning +- Can be omitted for global pattern matching across all rows +- Supports single or multiple partitioning columns + +### 2. ORDER BY (Required) + +**Purpose**: Specifies the order of rows within each partition for pattern evaluation. + +**Syntax**: +```sql +ORDER BY column_expression [ ASC | DESC ] [, ...] +``` + +**Examples from corpus**: +```sql +ORDER BY p_event_time ASC +``` + +**Notes**: +- **REQUIRED** - Pattern matching depends on row ordering +- Typically orders by timestamp for temporal pattern detection +- Supports ASC (ascending) or DESC (descending) ordering +- Can specify multiple ordering columns + +### 3. MEASURES + +**Purpose**: Defines computed values to be returned for each pattern match. + +**Syntax**: +```sql +MEASURES + expression AS alias [, + expression AS alias ...] +``` + +**Common Functions Used in MEASURES**: + +| Function | Description | Example | +|----------|-------------|---------| +| `MATCH_NUMBER()` | Returns a unique identifier for each match | `MATCH_NUMBER() AS match_number` | +| `FIRST(column)` | Returns value from first row of the match | `FIRST(p_event_time) AS start_time` | +| `LAST(column)` | Returns value from last row of the match | `LAST(p_event_time) AS end_time` | +| `COUNT(pattern.*)` | Counts rows matching a specific pattern variable | `COUNT(pattern_Login.*) AS num_logins` | + +**Examples from corpus**: +```sql +MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_AWS_EC2_Startup_Script_Change.*) AS num_pattern_AWS_EC2_Startup_Script_Change, + COUNT(pattern_AWS_EC2_StopInstances.*) AS num_pattern_AWS_EC2_StopInstances +``` + +**Notes**: +- Can reference pattern variables using dot notation (e.g., `pattern_variable.*`) +- Supports aggregate functions and row-level functions +- Column references without qualifiers refer to the entire match + +### 4. Output Mode + +**Purpose**: Determines how many rows are returned per match. + +**Options**: + +#### ONE ROW PER MATCH +- Returns a single summary row for each pattern match +- Contains only MEASURES values +- **Default behavior** (if not specified) + +#### ALL ROWS PER MATCH +- Returns all rows that participated in the match +- Each row includes the MEASURES values +- Useful for detailed analysis of matched sequences + +**Examples from corpus**: +```sql +ALL ROWS PER MATCH +``` + +**Note**: All examples in the corpus use `ALL ROWS PER MATCH`. + +### 5. AFTER MATCH (Optional) + +**Purpose**: Specifies where to resume pattern matching after a match is found. + +**Syntax**: +```sql +AFTER MATCH skip_strategy +``` + +**Skip Strategies**: + +| Strategy | Description | When to Use | +|----------|-------------|-------------| +| `SKIP PAST LAST ROW` | Resume after the last row of the current match | Non-overlapping matches (most common) | +| `SKIP TO NEXT ROW` | Resume from the row after the first row of the match | Overlapping matches allowed | +| `SKIP TO FIRST pattern_variable` | Resume at the first row of the specified pattern variable | Complex overlapping scenarios | +| `SKIP TO LAST pattern_variable` | Resume at the last row of the specified pattern variable | Complex overlapping scenarios | + +**Examples from corpus**: +```sql +AFTER MATCH SKIP PAST LAST ROW +``` + +**Note**: All examples in the corpus use `SKIP PAST LAST ROW`, which is the most common strategy for detecting distinct, non-overlapping sequences. + +### 6. PATTERN (Required) + +**Purpose**: Defines the sequence pattern to match using regular expression-like syntax. + +**Syntax**: +```sql +PATTERN ( pattern_expression ) +``` + +**Pattern Quantifiers**: + +| Quantifier | Description | Example | +|------------|-------------|---------| +| `{n}` | Exactly n occurrences | `A{3}` - exactly 3 A's | +| `{n,}` | At least n occurrences | `A{1,}` - one or more A's | +| `{n,m}` | Between n and m occurrences | `A{2,5}` - 2 to 5 A's | +| `{0,0}` | Zero occurrences (used with PERMUTE for "absence" detection) | `A{0,0}` - no A's | +| `+` | One or more (equivalent to `{1,}`) | `A+` - one or more A's | +| `*` | Zero or more | `A*` - zero or more A's | +| `?` | Zero or one | `A?` - optional A | + +**Pattern Operators**: + +| Operator | Description | Example | +|----------|-------------|---------| +| Space (concatenation) | Sequential pattern | `A B C` - A followed by B followed by C | +| `\|` (alternation) | Either pattern | `A \| B` - either A or B | +| `()` (grouping) | Groups sub-patterns | `(A B)+` - one or more A-B sequences | + +**Special Pattern Functions**: + +#### PERMUTE +**Purpose**: Matches pattern variables in any order (not necessarily sequential). + +**Syntax**: +```sql +PATTERN ( PERMUTE(pattern_var1{n1,m1}, pattern_var2{n2,m2}, ...) ) +``` + +**Examples from corpus**: +```sql +-- Any order of 4 different patterns +PATTERN (PERMUTE( + pattern_AWS_CloudTrail_SES_CheckSESSendingEnabled{1,}, + pattern_AWS_CloudTrail_SES_CheckSendQuota{1,}, + pattern_AWS_CloudTrail_SES_ListIdentities{1,}, + pattern_AWS_CloudTrail_SES_CheckIdentityVerifications{1,} +)) + +-- Detect presence of A but absence of B (within time window) +PATTERN (PERMUTE(pattern_GitHub_Advanced_Security_Change{1,}, pattern_Github_Repo_Archived{0,0})) +``` + +**Standard Sequential Patterns**: + +```sql +-- Simple sequence: A followed by B +PATTERN (pattern_A{1,} pattern_B{1,}) + +-- Complex sequence: A followed by B followed by C +PATTERN (pattern_A{1,} pattern_B{1,} pattern_C{1,}) + +-- Three-step sequence with minimum occurrences +PATTERN (pattern_TempStageCreated{1,} pattern_CopyIntoStage{1,} pattern_FileDownloaded{1,}) + +-- Sequence with specific count requirement +PATTERN (pattern_BruteForce{5,} pattern_LoginSuccess{1,}) +``` + +### 7. SUBSET (Optional) + +**Purpose**: Creates a union of multiple pattern variables under a single alias. + +**Syntax**: +```sql +SUBSET subset_name = (pattern_var1, pattern_var2, ...) +``` + +**Use Cases**: +- Grouping related pattern variables for aggregate functions +- Simplifying DEFINE conditions that apply to multiple variables +- Creating logical groups of events + +**Note**: Not commonly used in the corpus examples, but supported in the standard. + +### 8. DEFINE (Required) + +**Purpose**: Specifies the conditions that rows must satisfy to be classified as each pattern variable. + +**Syntax**: +```sql +DEFINE + pattern_variable AS condition [, + pattern_variable AS condition ...] +``` + +**Common Condition Patterns**: + +#### Simple Equality Conditions +```sql +pattern_AWS_Console_Login AS p_rule_id = 'AWS.Console.Login' +pattern_Okta_Login AS p_rule_id = 'Okta.Login.Success' +``` + +#### Time-Based Constraints with LAG + +**Purpose**: Ensure events occur within a specified time window. + +```sql +-- Pattern must occur within N minutes of previous event +pattern_Variable AS p_rule_id = 'Rule.ID' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 60) +``` + +**Common time windows from corpus**: +- 15 minutes: Quick succession events +- 30 minutes: Related security events +- 60 minutes: Related workflow events +- 90 minutes: Extended workflow patterns +- 120 minutes: Long-running processes +- 720 minutes (12 hours): Extended persistence patterns + +#### Negative Time Constraints with LAG + +**Purpose**: Ensure a preceding event did NOT occur within a time window. + +```sql +-- Match if previous event was different OR happened too long ago +pattern_Variable AS p_rule_id = 'Current.Rule' + AND (LAG(p_rule_id, 1, '') != 'Previous.Rule' + OR (LAG(p_rule_id, 1, '') = 'Previous.Rule' + AND ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) > 15)) +``` + +#### Negative Time Constraints with LEAD + +**Purpose**: Ensure a following event did NOT occur within a time window. + +```sql +-- Match if next event is different OR happens too far in future +pattern_Variable AS p_rule_id = 'Current.Rule' + AND (LEAD(p_rule_id, 1, '') != 'Next.Rule' + OR (LEAD(p_rule_id, 1, '') = 'Next.Rule' + AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > 60)) +``` + +**Navigation Functions in DEFINE**: + +| Function | Description | Example Use Case | +|----------|-------------|------------------| +| `LAG(column, offset, default)` | Access preceding row value | Time gap from previous event | +| `LEAD(column, offset, default)` | Access following row value | Time gap to next event | +| `PREV(column)` | Previous row (shorthand for LAG) | Price comparison | +| `FIRST(column)` | First row in match so far | Compare to starting value | +| `LAST(column)` | Last row in match so far | Compare to ending value | + +**Complex Condition Example**: +```sql +DEFINE + pattern_AWS_EC2_Startup_Script_Change AS + p_rule_id = 'AWS.EC2.Startup.Script.Change' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 90), + pattern_AWS_EC2_StopInstances AS + p_rule_id = 'AWS.EC2.StopInstances' +``` + +## Common Pattern Examples + +### 1. Sequential Event Detection + +**Use Case**: Detect A followed by B within a time window. + +```sql +MATCH_RECOGNIZE ( + PARTITION BY user_id + ORDER BY event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(event_time) AS start_time, + LAST(event_time) AS end_time + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_A{1,} pattern_B{1,}) + DEFINE + pattern_A AS event_type = 'TypeA', + pattern_B AS event_type = 'TypeB' + AND (LAG(event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(event_time), event_time)) <= 60) +) +``` + +### 2. Three-Step Sequential Pattern + +**Use Case**: Detect multi-stage attack or workflow (A → B → C). + +```sql +MATCH_RECOGNIZE ( + PARTITION BY entity_id + ORDER BY event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(event_time) AS start_time, + LAST(event_time) AS end_time, + COUNT(pattern_A.*) AS num_a, + COUNT(pattern_B.*) AS num_b, + COUNT(pattern_C.*) AS num_c + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_A{1,} pattern_B{1,} pattern_C{1,}) + DEFINE + pattern_A AS event_type = 'TypeA', + pattern_B AS event_type = 'TypeB' + AND (LAG(event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(event_time), event_time)) <= 15), + pattern_C AS event_type = 'TypeC' + AND (LAG(event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(event_time), event_time)) <= 15) +) +``` + +### 3. Absence Detection (Negative Pattern) + +**Use Case**: Detect event A without subsequent event B within time window. + +```sql +MATCH_RECOGNIZE ( + PARTITION BY entity_id + ORDER BY event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + COUNT(pattern_A.*) AS num_a + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_A{1,}) + DEFINE + pattern_A AS event_type = 'TypeA' + AND (LEAD(event_type, 1, '') != 'TypeB' + OR (LEAD(event_type, 1, '') = 'TypeB' + AND ABS(DATEDIFF(MINS, LEAD(event_time), event_time)) > 60)) +) +``` + +### 4. Unordered Pattern Matching (PERMUTE) + +**Use Case**: Detect all of multiple events in any order. + +```sql +MATCH_RECOGNIZE ( + PARTITION BY account_region + ORDER BY event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + COUNT(pattern_A.*) AS num_a, + COUNT(pattern_B.*) AS num_b, + COUNT(pattern_C.*) AS num_c + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (PERMUTE(pattern_A{1,}, pattern_B{1,}, pattern_C{1,})) + DEFINE + pattern_A AS event_type = 'TypeA', + pattern_B AS event_type = 'TypeB', + pattern_C AS event_type = 'TypeC' +) +``` + +### 5. Threshold-Based Pattern + +**Use Case**: Detect N failures followed by success (e.g., brute force). + +```sql +MATCH_RECOGNIZE ( + PARTITION BY ip_address + ORDER BY event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(event_time) AS start_time, + LAST(event_time) AS end_time, + COUNT(pattern_Failure.*) AS num_failures, + COUNT(pattern_Success.*) AS num_successes + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_Failure{5,} pattern_Success{1,}) + DEFINE + pattern_Failure AS event_type = 'LoginFailure', + pattern_Success AS event_type = 'LoginSuccess' + AND (LAG(event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(event_time), event_time)) <= 30) +) +``` + +### 6. Global Pattern (No Partitioning) + +**Use Case**: Match patterns across entire dataset. + +```sql +MATCH_RECOGNIZE ( + ORDER BY event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(event_time) AS start_time, + LAST(event_time) AS end_time + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_A{1,}) + DEFINE + pattern_A AS event_type = 'TargetEvent' + AND (LEAD(event_type, 1, '') != 'FollowUp' + OR (LEAD(event_type, 1, '') = 'FollowUp' + AND ABS(DATEDIFF(MINS, LEAD(event_time), event_time)) > 60)) +) +``` + +## Key Design Patterns from Corpus + +### 1. Time Window Constraints + +Most security and event correlation use cases require events to occur within specific time windows: + +```sql +-- Within N minutes of previous event +AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 60) +``` + +### 2. Negative Pattern Detection + +Detecting what DIDN'T happen is crucial for security anomaly detection: + +```sql +-- Event A without event B within time window +AND (LEAD(p_rule_id, 1, '') != 'Expected.Event' + OR (LEAD(p_rule_id, 1, '') = 'Expected.Event' + AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > threshold)) +``` + +### 3. Comprehensive Measures + +Security and audit applications typically capture: +- Match identifier: `MATCH_NUMBER()` +- Temporal bounds: `FIRST(p_event_time)`, `LAST(p_event_time)` +- Event counts: `COUNT(pattern_variable.*)` for each pattern variable + +### 4. Consistent Naming Convention + +Pattern variables follow clear naming: `pattern_` +- Example: `pattern_AWS_IAM_CreateUser`, `pattern_Okta_Login_Success` + +## Implementation Notes + +### Typical Use Cases + +1. **Security Event Correlation**: Detect multi-stage attacks, privilege escalation, account compromise +2. **Fraud Detection**: Identify suspicious transaction sequences +3. **Workflow Monitoring**: Track multi-step processes and detect anomalies +4. **SLA Monitoring**: Detect missing or delayed steps in expected sequences +5. **Behavioral Analytics**: Identify unusual patterns in user behavior + +### Performance Considerations + +1. **Partitioning**: Proper partitioning is critical for performance and correctness + - Partition by entity (user, account, IP address, etc.) + - Ensures pattern matching within related event streams + +2. **Ordering**: Always order by timestamp for temporal patterns + - Use ASC for forward-looking patterns + - Critical for LAG/LEAD correctness + +3. **Time Windows**: Use DATEDIFF constraints to limit pattern search scope + - Prevents matching across unrelated time periods + - Improves query performance + +### Best Practices + +1. **Always specify PARTITION BY** unless truly global pattern matching is needed +2. **Use ALL ROWS PER MATCH** for detailed forensics and debugging +3. **Include MATCH_NUMBER()** in MEASURES for unique match identification +4. **Capture start/end times** using FIRST() and LAST() functions +5. **Count each pattern variable** to understand match composition +6. **Use SKIP PAST LAST ROW** for non-overlapping matches (most common) +7. **Apply time window constraints** in DEFINE to ensure temporal relevance +8. **Use NULL checks with LAG/LEAD** to handle first/last rows in partition +9. **Use ABS(DATEDIFF(...))** for bidirectional time comparisons + +## SQL Dialect Notes + +The examples in this corpus appear to be written for **Snowflake SQL** dialect, evidenced by: + +- `DATEDIFF(MINS, ...)` function with MINS as first parameter +- `LATERAL FLATTEN` for JSON array expansion +- Snowflake-specific table references and system functions +- `p_occurs_since()` custom function for time filtering + +Key functions used: +- `DATEDIFF(unit, start, end)`: Calculate time difference +- `LAG(column, offset, default)`: Access previous row +- `LEAD(column, offset, default)`: Access next row +- `ABS()`: Absolute value +- `COALESCE()`: Return first non-null value +- `GET_PATH()`: Extract JSON path value + +## Conclusion + +The MATCH_RECOGNIZE clause provides a powerful, declarative way to perform pattern matching on ordered datasets. The syntax combines SQL's familiar structure with regular expression-like pattern matching, making it particularly effective for: + +- Temporal sequence analysis +- Security event correlation +- Fraud detection +- Process mining +- Behavioral analytics + +The key to effective use is: +1. Proper partitioning by entity +2. Correct temporal ordering +3. Well-defined pattern variables with time constraints +4. Comprehensive measures for analysis +5. Appropriate skip strategy for match handling + + + + diff --git a/MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md b/MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md new file mode 100644 index 000000000000..a5f157ee4ddc --- /dev/null +++ b/MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md @@ -0,0 +1,746 @@ +# MATCH_RECOGNIZE Clause - Syntax Observed in Examples + +This document describes **only** the MATCH_RECOGNIZE syntax patterns actually present in the provided spreadsheet examples. No additional SQL standard features are included. + +## Overall Structure Observed + +Every MATCH_RECOGNIZE clause in the examples follows this structure: + +```sql +FROM table_name +MATCH_RECOGNIZE ( + [ PARTITION BY column ] + ORDER BY column ASC + MEASURES + measure_expression AS alias, + ... + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN ( pattern_expression ) + DEFINE + pattern_variable AS condition, + ... +) +``` + +## Clause Usage in Examples + +### Clauses Present in ALL Examples (18 out of 18) + +1. ✅ **ORDER BY** - Present in all 18 examples +2. ✅ **MEASURES** - Present in all 18 examples +3. ✅ **ALL ROWS PER MATCH** - Present in all 18 examples +4. ✅ **AFTER MATCH SKIP PAST LAST ROW** - Present in all 18 examples +5. ✅ **PATTERN** - Present in all 18 examples (required) +6. ✅ **DEFINE** - Present in all 18 examples (required) + +### Clauses Present in MOST Examples + +7. ✅ **PARTITION BY** - Present in 17 out of 18 examples + - Missing in: `secret_exposed_and_not_quarantined.yml` + +### Clauses NEVER Used in Examples + +- ❌ **ONE ROW PER MATCH** - Never used (all use ALL ROWS PER MATCH) +- ❌ **SUBSET** - Never used in any example + +## Detailed Clause Breakdown + +### 1. PARTITION BY + +**Usage**: 17 out of 18 examples use PARTITION BY + +**Observed Syntax**: +```sql +PARTITION BY single_column +``` + +**Examples from spreadsheet**: +```sql +PARTITION BY match_0_0 +PARTITION BY accountRegion +PARTITION BY field_name +``` + +**Notes**: +- Always partitions by exactly ONE column +- Never uses multiple columns +- One example omits PARTITION BY entirely (global matching) + +### 2. ORDER BY + +**Usage**: 18 out of 18 examples use ORDER BY + +**Observed Syntax**: +```sql +ORDER BY column ASC +``` + +**Examples from spreadsheet**: +```sql +ORDER BY p_event_time ASC +``` + +**Notes**: +- Always orders by exactly ONE column (always `p_event_time`) +- Always uses ASC (ascending) +- Never uses DESC +- Never uses multiple columns + +### 3. MEASURES + +**Usage**: 18 out of 18 examples use MEASURES + +**Observed Functions**: + +All examples use exactly this pattern: + +```sql +MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_variable_name.*) AS num_pattern_variable_name, + COUNT(pattern_variable_name2.*) AS num_pattern_variable_name2, + ... +``` + +**Functions Observed**: +1. `MATCH_NUMBER()` - Used in all 18 examples +2. `FIRST(column)` - Used in all 18 examples (always with `p_event_time`) +3. `LAST(column)` - Used in all 18 examples (always with `p_event_time`) +4. `COUNT(pattern.*)` - Used in all 18 examples (one or more per query) + +**Actual Examples**: + +```sql +-- Example 1: Two pattern variables +MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_AWS_EC2_Startup_Script_Change.*) AS num_pattern_AWS_EC2_Startup_Script_Change, + COUNT(pattern_AWS_EC2_StopInstances.*) AS num_pattern_AWS_EC2_StopInstances + +-- Example 2: One pattern variable +MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_AWS_Console_Sign_In.*) AS num_pattern_AWS_Console_Sign_In + +-- Example 3: Four pattern variables +MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_AWS_CloudTrail_SES_CheckSendQuota.*) AS num_pattern_AWS_CloudTrail_SES_CheckSendQuota, + COUNT(pattern_AWS_CloudTrail_SES_CheckSESSendingEnabled.*) AS num_pattern_AWS_CloudTrail_SES_CheckSESSendingEnabled, + COUNT(pattern_AWS_CloudTrail_SES_CheckIdentityVerifications.*) AS num_pattern_AWS_CloudTrail_SES_CheckIdentityVerifications, + COUNT(pattern_AWS_CloudTrail_SES_ListIdentities.*) AS num_pattern_AWS_CloudTrail_SES_ListIdentities +``` + +**Pattern**: +- Every example counts occurrences of each pattern variable defined in DEFINE clause +- Naming convention: `num_` + pattern variable name + +### 4. ALL ROWS PER MATCH + +**Usage**: 18 out of 18 examples + +**Observed Syntax**: +```sql +ALL ROWS PER MATCH +``` + +**Notes**: +- 100% of examples use this +- No examples use `ONE ROW PER MATCH` + +### 5. AFTER MATCH + +**Usage**: 18 out of 18 examples + +**Observed Syntax**: +```sql +AFTER MATCH SKIP PAST LAST ROW +``` + +**Notes**: +- 100% of examples use `SKIP PAST LAST ROW` +- No other skip strategies observed: + - Never uses `SKIP TO NEXT ROW` + - Never uses `SKIP TO FIRST variable` + - Never uses `SKIP TO LAST variable` + +### 6. PATTERN + +**Usage**: 18 out of 18 examples (required) + +**Observed Pattern Types**: + +#### Type 1: Sequential Pattern (11 examples) + +**Syntax**: +```sql +PATTERN (pattern_A{n,} pattern_B{n,}) +PATTERN (pattern_A{n,} pattern_B{n,} pattern_C{n,}) +``` + +**Examples from spreadsheet**: + +```sql +-- Two-step sequence +PATTERN (pattern_AWS_EC2_StopInstances{1,} pattern_AWS_EC2_Startup_Script_Change{1,}) +PATTERN (pattern_AWS_IAM_CreateUser{1,} pattern_AWS_IAM_AttachAdminUserPolicy{1,}) +PATTERN (pattern_AWS_IAM_CreateRole{1,} pattern_AWS_IAM_AttachAdminRolePolicy{1,}) +PATTERN (pattern_AWS_IAM_Backdoor_User_Keys{1,} pattern_AWS_CloudTrail_UserAccessKeyAuth{1,}) +PATTERN (pattern_AWS_CloudTrail_LoginProfileCreatedOrModified{1,} pattern_AWS_Console_Login{1,}) +PATTERN (pattern_GCP_Cloud_Run_Service_Created{1,} pattern_GCP_Cloud_Run_Set_IAM_Policy{1,}) +PATTERN (pattern_Notion_Login{1,} pattern_Notion_AccountChange{1,}) +PATTERN (pattern_OneLogin_HighRiskFailedLogin{1,} pattern_OneLogin_Login{1,}) +PATTERN (pattern_Okta_Login_Without_Push_Marker{1,} pattern_Push_Security_Phishing_Attack{1,}) +PATTERN (pattern_Wiz_Alert_Passthrough{1,} pattern_AWS_VPC_SSHAllowedSignal{1,}) +PATTERN (pattern_Crowdstrike_NewUserCreated{1,} pattern_Crowdstrike_UserDeleted{1,}) + +-- Three-step sequence +PATTERN (pattern_GCP_IAM_Tag_Enumeration{1,} pattern_GCP_Tag_Binding_Creation{1,} pattern_GCP_Privileged_Operation{1,}) +PATTERN (pattern_Snowflake_TempStageCreated{1,} pattern_Snowflake_CopyIntoStage{1,} pattern_Snowflake_FileDownloaded{1,}) + +-- Sequence with minimum count > 1 +PATTERN (pattern_Snowflake_Stream_BruteForceByIp{5,} pattern_Snowflake_Stream_LoginSuccess{1,}) +``` + +#### Type 2: Single Pattern (Negative Detection) (4 examples) + +**Syntax**: +```sql +PATTERN (pattern_A{1,}) +``` + +**Examples from spreadsheet**: + +```sql +PATTERN (pattern_AWS_Console_Sign_In{1,}) +PATTERN (pattern_Retrieve_SSO_access_token{1,}) +PATTERN (pattern_Okta_Login_Success{1,}) +PATTERN (pattern_GitHub_Secret_Scanning_Alert_Created{1,}) +``` + +**Note**: These use negative conditions in DEFINE with LEAD or LAG to detect absence of expected follow-up events. + +#### Type 3: PERMUTE (Unordered) Pattern (2 examples) + +**Syntax**: +```sql +PATTERN (PERMUTE(pattern_A{n,}, pattern_B{n,}, ...)) +``` + +**Examples from spreadsheet**: + +```sql +-- Absence detection: A present, B absent +PATTERN (PERMUTE(pattern_GitHub_Advanced_Security_Change{1,}, pattern_Github_Repo_Archived{0,0})) + +-- All four patterns in any order +PATTERN (PERMUTE( + pattern_AWS_CloudTrail_SES_CheckSESSendingEnabled{1,}, + pattern_AWS_CloudTrail_SES_CheckSendQuota{1,}, + pattern_AWS_CloudTrail_SES_ListIdentities{1,}, + pattern_AWS_CloudTrail_SES_CheckIdentityVerifications{1,} +)) +``` + +**Quantifiers Observed**: + +| Quantifier | Meaning | Example Count | +|------------|---------|---------------| +| `{1,}` | One or more | 16 examples | +| `{5,}` | Five or more | 1 example | +| `{0,0}` | Zero (absence) | 1 example | + +**Quantifiers NOT Observed**: +- Never uses `{n}` (exactly n) +- Never uses `{n,m}` (between n and m) +- Never uses `+`, `*`, or `?` shorthand +- Never uses `|` (alternation) +- Never uses `()` grouping + +### 7. DEFINE + +**Usage**: 18 out of 18 examples (required) + +**Observed Pattern**: Every DEFINE clause defines one or more pattern variables with conditions. + +**Condition Types Observed**: + +#### Type 1: Simple Equality (All Examples) + +Every pattern variable starts with a simple equality check: + +```sql +pattern_variable_name AS p_rule_id = 'Rule.Name' +``` + +#### Type 2: Simple Equality ONLY (3 examples) + +Some pattern variables have ONLY the equality condition: + +```sql +pattern_AWS_IAM_CreateUser AS p_rule_id = 'AWS.IAM.CreateUser' +pattern_GCP_IAM_Tag_Enumeration AS p_rule_id = 'GCP.IAM.Tag.Enumeration' +pattern_Snowflake_TempStageCreated AS p_rule_id = 'Snowflake.TempStageCreated' +``` + +#### Type 3: Equality + LAG Time Constraint (Most Common) + +**Syntax Pattern**: +```sql +pattern_variable AS p_rule_id = 'Rule.Name' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= N) +``` + +**Actual Examples**: + +```sql +-- 15 minutes +pattern_Notion_AccountChange AS p_rule_id = 'Notion.AccountChange' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 15) + +-- 30 minutes +pattern_Snowflake_Stream_LoginSuccess AS p_rule_id = 'Snowflake.Stream.LoginSuccess' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 30) + +-- 60 minutes +pattern_AWS_IAM_AttachAdminUserPolicy AS p_rule_id = 'AWS.IAM.AttachAdminUserPolicy' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 60) + +-- 90 minutes +pattern_AWS_EC2_Startup_Script_Change AS p_rule_id = 'AWS.EC2.Startup.Script.Change' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 90) + +-- 720 minutes (12 hours) +pattern_Crowdstrike_UserDeleted AS p_rule_id = 'Crowdstrike.UserDeleted' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 720) +``` + +**Time Windows Observed**: +- 15 minutes: 3 examples +- 30 minutes: 1 example +- 60 minutes: 6 examples +- 90 minutes: 2 examples +- 120 minutes: 1 example +- 720 minutes: 1 example + +#### Type 4: Negative LAG Constraint (Absence Detection) + +**Syntax Pattern**: +```sql +pattern_variable AS p_rule_id = 'Current.Rule' + AND (LAG(p_rule_id, 1, '') != 'Previous.Rule' + OR (LAG(p_rule_id, 1, '') = 'Previous.Rule' + AND ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) > N)) +``` + +**Actual Examples**: + +```sql +-- AWS Console sign-in WITHOUT Okta SSO within 15 minutes +pattern_AWS_Console_Sign_In AS p_rule_id = 'AWS.Console.Sign-In' + AND (LAG(p_rule_id, 1, '') != 'Okta.SSO.to.AWS' + OR (LAG(p_rule_id, 1, '') = 'Okta.SSO.to.AWS' + AND ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) > 15)) + +-- SSO token retrieval WITHOUT CLI prompt within 120 minutes +pattern_Retrieve_SSO_access_token AS p_rule_id = 'Retrieve.SSO.access.token' + AND (LAG(p_rule_id, 1, '') != 'Sign-in.with.AWS.CLI.prompt' + OR (LAG(p_rule_id, 1, '') = 'Sign-in.with.AWS.CLI.prompt' + AND ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) > 120)) +``` + +**Pattern**: Detects event A when event B did NOT occur immediately before, or occurred too long ago. + +#### Type 5: Negative LEAD Constraint (Absence Detection) + +**Syntax Pattern**: +```sql +pattern_variable AS p_rule_id = 'Current.Rule' + AND (LEAD(p_rule_id, 1, '') != 'Next.Rule' + OR (LEAD(p_rule_id, 1, '') = 'Next.Rule' + AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > N)) +``` + +**Actual Examples**: + +```sql +-- Okta login WITHOUT Push Security within 60 minutes +pattern_Okta_Login_Success AS p_rule_id = 'Okta.Login.Success' + AND (LEAD(p_rule_id, 1, '') != 'Push.Security.Authorized.IdP.Login' + OR (LEAD(p_rule_id, 1, '') = 'Push.Security.Authorized.IdP.Login' + AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > 60)) + +-- GitHub secret exposed WITHOUT quarantine within 60 minutes +pattern_GitHub_Secret_Scanning_Alert_Created AS p_rule_id = 'GitHub.Secret.Scanning.Alert.Created' + AND (LEAD(p_rule_id, 1, '') != 'AWS.CloudTrail.IAMCompromisedKeyQuarantine' + OR (LEAD(p_rule_id, 1, '') = 'AWS.CloudTrail.IAMCompromisedKeyQuarantine' + AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > 60)) +``` + +**Pattern**: Detects event A when event B does NOT occur immediately after, or occurs too far in the future. + +**Functions Used in DEFINE**: + +| Function | Usage Count | Purpose | +|----------|-------------|---------| +| `LAG(column, offset, default)` | 14 examples | Access previous row value | +| `LEAD(column, offset, default)` | 2 examples | Access next row value | +| `DATEDIFF(MINS, start, end)` | 16 examples | Calculate minute difference | +| `ABS(value)` | 16 examples | Absolute value for time gaps | + +**Functions NEVER Used**: +- `PREV()` - Never used (always use LAG instead) +- `FIRST()` - Never used in DEFINE (only in MEASURES) +- `LAST()` - Never used in DEFINE (only in MEASURES) + +## Complete Pattern Templates from Examples + +### Template 1: Sequential Two-Step Pattern (Most Common) + +```sql +FROM filter_data +MATCH_RECOGNIZE ( + PARTITION BY match_column + ORDER BY p_event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_StepA.*) AS num_pattern_StepA, + COUNT(pattern_StepB.*) AS num_pattern_StepB + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_StepA{1,} pattern_StepB{1,}) + DEFINE + pattern_StepA AS p_rule_id = 'Rule.A', + pattern_StepB AS p_rule_id = 'Rule.B' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 60) +) +``` + +### Template 2: Sequential Three-Step Pattern + +```sql +FROM filter_data +MATCH_RECOGNIZE ( + PARTITION BY match_column + ORDER BY p_event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_StepA.*) AS num_pattern_StepA, + COUNT(pattern_StepB.*) AS num_pattern_StepB, + COUNT(pattern_StepC.*) AS num_pattern_StepC + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_StepA{1,} pattern_StepB{1,} pattern_StepC{1,}) + DEFINE + pattern_StepA AS p_rule_id = 'Rule.A', + pattern_StepB AS p_rule_id = 'Rule.B' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 15), + pattern_StepC AS p_rule_id = 'Rule.C' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 15) +) +``` + +### Template 3: Absence Detection (Event Without Follow-up) + +```sql +FROM filter_data +MATCH_RECOGNIZE ( + PARTITION BY match_column + ORDER BY p_event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_EventA.*) AS num_pattern_EventA + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_EventA{1,}) + DEFINE + pattern_EventA AS p_rule_id = 'Rule.A' + AND (LEAD(p_rule_id, 1, '') != 'Rule.B' + OR (LEAD(p_rule_id, 1, '') = 'Rule.B' + AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > 60)) +) +``` + +### Template 4: PERMUTE Pattern (Any Order) + +```sql +FROM filter_data +MATCH_RECOGNIZE ( + PARTITION BY match_column + ORDER BY p_event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_EventA.*) AS num_pattern_EventA, + COUNT(pattern_EventB.*) AS num_pattern_EventB, + COUNT(pattern_EventC.*) AS num_pattern_EventC + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (PERMUTE(pattern_EventA{1,}, pattern_EventB{1,}, pattern_EventC{1,})) + DEFINE + pattern_EventA AS p_rule_id = 'Rule.A', + pattern_EventB AS p_rule_id = 'Rule.B', + pattern_EventC AS p_rule_id = 'Rule.C' +) +``` + +### Template 5: Threshold Pattern (N occurrences then success) + +```sql +FROM filter_data +MATCH_RECOGNIZE ( + PARTITION BY match_column + ORDER BY p_event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_Failure.*) AS num_pattern_Failure, + COUNT(pattern_Success.*) AS num_pattern_Success + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_Failure{5,} pattern_Success{1,}) + DEFINE + pattern_Failure AS p_rule_id = 'Rule.Failure', + pattern_Success AS p_rule_id = 'Rule.Success' + AND (LAG(p_event_time, 1, NULL) is NULL + OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 30) +) +``` + +## Summary Statistics + +### By Clause Usage + +| Clause | Usage | Notes | +|--------|-------|-------| +| ORDER BY | 18/18 (100%) | Always `p_event_time ASC` | +| MEASURES | 18/18 (100%) | Always includes MATCH_NUMBER(), FIRST(), LAST(), COUNT() | +| ALL ROWS PER MATCH | 18/18 (100%) | No examples use ONE ROW PER MATCH | +| AFTER MATCH | 18/18 (100%) | Always `SKIP PAST LAST ROW` | +| PATTERN | 18/18 (100%) | Required clause | +| DEFINE | 18/18 (100%) | Required clause | +| PARTITION BY | 17/18 (94%) | One example omits it | +| SUBSET | 0/18 (0%) | Never used | + +### By Pattern Type + +| Pattern Type | Count | Percentage | +|--------------|-------|------------| +| Sequential (2 steps) | 10 | 56% | +| Sequential (3 steps) | 2 | 11% | +| Single pattern (absence detection) | 4 | 22% | +| PERMUTE (unordered) | 2 | 11% | + +### By DEFINE Condition Type + +| Condition Type | Approx. Count | Percentage | +|----------------|---------------|------------| +| Simple equality only | ~5 | ~28% | +| Equality + LAG time constraint | ~11 | ~61% | +| Equality + negative LAG | ~2 | ~11% | +| Equality + negative LEAD | ~2 | ~11% | + +Note: Some examples have multiple pattern variables with different condition types, so percentages don't sum to 100%. + +## Naming Conventions Observed + +### Pattern Variables +All pattern variables follow this naming convention: +``` +pattern_ +``` + +Examples: +- `pattern_AWS_IAM_CreateUser` +- `pattern_GCP_Cloud_Run_Service_Created` +- `pattern_Okta_Login_Success` +- `pattern_Snowflake_TempStageCreated` + +### Measure Aliases +All measure aliases follow consistent naming: +- Match identifier: `match_number` +- Start time: `start_time` +- End time: `end_time` +- Count pattern: `num_` (without "pattern_" prefix) + +### Column Names +The examples use consistent column naming: +- Event time: `p_event_time` +- Rule ID: `p_rule_id` +- Match key: `match_0_0`, `accountRegion`, `field_name`, `empty_match` + +## Complete Example Breakdown + +### Example 1: aws_cloudtrail_stopinstance_followed_by_modifyinstanceattributes.yml + +```sql +MATCH_RECOGNIZE ( + PARTITION BY match_0_0 + ORDER BY p_event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_AWS_EC2_Startup_Script_Change.*) AS num_pattern_AWS_EC2_Startup_Script_Change, + COUNT(pattern_AWS_EC2_StopInstances.*) AS num_pattern_AWS_EC2_StopInstances + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_AWS_EC2_StopInstances{1,} pattern_AWS_EC2_Startup_Script_Change{1,}) + DEFINE + pattern_AWS_EC2_Startup_Script_Change AS p_rule_id = 'AWS.EC2.Startup.Script.Change' + AND (LAG(p_event_time, 1, NULL) is NULL OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 90), + pattern_AWS_EC2_StopInstances AS p_rule_id = 'AWS.EC2.StopInstances' +) +``` + +**Pattern Type**: Sequential two-step +**Time Window**: 90 minutes +**Purpose**: Detect EC2 instance stop followed by startup script modification + +### Example 2: github_advanced_security_change_not_followed_by_repo_archived.yml + +```sql +MATCH_RECOGNIZE ( + PARTITION BY field_name + ORDER BY p_event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_GitHub_Advanced_Security_Change.*) AS num_pattern_GitHub_Advanced_Security_Change, + COUNT(pattern_Github_Repo_Archived.*) AS num_pattern_Github_Repo_Archived + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (PERMUTE(pattern_GitHub_Advanced_Security_Change{1,}, pattern_Github_Repo_Archived{0,0})) + DEFINE + pattern_GitHub_Advanced_Security_Change AS p_rule_id = 'GitHub.Advanced.Security.Change', + pattern_Github_Repo_Archived AS p_rule_id = 'Github.Repo.Archived' +) +``` + +**Pattern Type**: PERMUTE with absence detection (`{0,0}`) +**Time Window**: None +**Purpose**: Detect security change WITHOUT repo being archived +**Special**: Uses `HAVING num_pattern_Github_Repo_Archived = 0` after MATCH_RECOGNIZE + +### Example 3: snowflake_potential_brute_force_success.yml + +```sql +MATCH_RECOGNIZE ( + PARTITION BY match_0_0 + ORDER BY p_event_time ASC + MEASURES + MATCH_NUMBER() AS match_number, + FIRST(p_event_time) AS start_time, + LAST(p_event_time) AS end_time, + COUNT(pattern_Snowflake_Stream_BruteForceByIp.*) AS num_pattern_Snowflake_Stream_BruteForceByIp, + COUNT(pattern_Snowflake_Stream_LoginSuccess.*) AS num_pattern_Snowflake_Stream_LoginSuccess + ALL ROWS PER MATCH + AFTER MATCH SKIP PAST LAST ROW + PATTERN (pattern_Snowflake_Stream_BruteForceByIp{5,} pattern_Snowflake_Stream_LoginSuccess{1,}) + DEFINE + pattern_Snowflake_Stream_BruteForceByIp AS p_rule_id = 'Snowflake.Stream.BruteForceByIp', + pattern_Snowflake_Stream_LoginSuccess AS p_rule_id = 'Snowflake.Stream.LoginSuccess' + AND (LAG(p_event_time, 1, NULL) is NULL OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 30) +) +``` + +**Pattern Type**: Threshold-based (minimum 5 failures) +**Time Window**: 30 minutes +**Purpose**: Detect at least 5 brute force attempts followed by successful login + +## Functions and Operators Summary + +### Functions Used in MEASURES +- `MATCH_NUMBER()` - 18/18 examples +- `FIRST(column)` - 18/18 examples +- `LAST(column)` - 18/18 examples +- `COUNT(pattern.*)` - 18/18 examples + +### Functions Used in DEFINE +- `LAG(column, offset, default)` - 14/18 examples +- `LEAD(column, offset, default)` - 2/18 examples +- `DATEDIFF(MINS, start, end)` - 16/18 examples +- `ABS(value)` - 16/18 examples + +### Operators Used +- `=` (equality) - All examples +- `!=` (inequality) - 4 examples (negative detection) +- `AND` - All examples with complex conditions +- `OR` - All examples with LAG/LEAD time checks +- `<=` (less than or equal) - Positive time constraints +- `>` (greater than) - Negative time constraints + +### Operators NOT Observed +- `<` (less than) +- `>=` (greater than or equal) +- `BETWEEN` +- `IN` +- `LIKE` +- Arithmetic operators (+, -, *, /) + +## Snowflake-Specific Syntax + +The examples appear to use Snowflake SQL dialect: + +1. `DATEDIFF(MINS, start_time, end_time)` - Snowflake syntax for date difference +2. `LATERAL FLATTEN` - Snowflake JSON array processing (in filter CTE, before MATCH_RECOGNIZE) +3. `p_occurs_since('N minutes')` - Custom function for time filtering (in filter CTE) +4. `GET_PATH()` - Snowflake JSON path extraction (in filter CTE) + +## Conclusion + +Based on the 18 examples in the spreadsheet, the MATCH_RECOGNIZE syntax used follows a very consistent pattern: + +**Always Present**: +- `PARTITION BY` (17/18 examples, always single column) +- `ORDER BY p_event_time ASC` (always) +- `MEASURES` with `MATCH_NUMBER()`, `FIRST()`, `LAST()`, `COUNT()` (always) +- `ALL ROWS PER MATCH` (always, never ONE ROW PER MATCH) +- `AFTER MATCH SKIP PAST LAST ROW` (always, no other strategies) +- `PATTERN` with `{1,}` or `{5,}` quantifiers (always) +- `DEFINE` with equality conditions, often with LAG/LEAD time constraints (always) + +**Never Present**: +- `SUBSET` clause +- `ONE ROW PER MATCH` +- Other `AFTER MATCH` strategies +- Quantifiers: `{n}`, `{n,m}` (except `{0,0}`), `+`, `*`, `?` +- Pattern operators: `|`, grouping with `()` +- `PREV()` function + +**Rarely Present**: +- `PERMUTE()` (2/18 examples) +- `LEAD()` (2/18 examples, mostly use LAG) +- Quantifier `{5,}` (1 example, threshold detection) +- Quantifier `{0,0}` (1 example, absence detection) + + + + diff --git a/identifier-clause-comparison-v2.csv b/identifier-clause-comparison-v2.csv new file mode 100644 index 000000000000..89c33a385ad9 --- /dev/null +++ b/identifier-clause-comparison-v2.csv @@ -0,0 +1,274 @@ +Query#,SQL Text,Master,identifier-clause,identifier-clause-legacy +1,SET hivevar:colname = 'c',SUCCESS,SUCCESS,SUCCESS +2,SELECT IDENTIFIER(${colname} || '_1') FROM VALUES(1) AS T(c_1),SUCCESS,SUCCESS,SUCCESS +3,SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +4,SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +5,SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +6,SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`),SUCCESS,SUCCESS,SUCCESS +7,SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``),SUCCESS,SUCCESS,SUCCESS +8,SELECT IDENTIFIER('c' || '1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +9,CREATE SCHEMA IF NOT EXISTS s,SUCCESS,SUCCESS,SUCCESS +10,CREATE TABLE s.tab(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +11,USE SCHEMA s,SUCCESS,SUCCESS,SUCCESS +12,INSERT INTO IDENTIFIER('ta' || 'b') VALUES(1),SUCCESS,SUCCESS,SUCCESS +13,DELETE FROM IDENTIFIER('ta' || 'b') WHERE 1=0,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION +14,UPDATE IDENTIFIER('ta' || 'b') SET c1 = 2,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096 +15,"MERGE INTO IDENTIFIER('ta' || 'b') AS t USING IDENTIFIER('ta' || 'b') AS s ON s.c1 = t.c1 + WHEN MATCHED THEN UPDATE SET c1 = 3",_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096 +16,SELECT * FROM IDENTIFIER('tab'),SUCCESS,SUCCESS,SUCCESS +17,SELECT * FROM IDENTIFIER('s.tab'),SUCCESS,SUCCESS,SUCCESS +18,SELECT * FROM IDENTIFIER('`s`.`tab`'),SUCCESS,SUCCESS,SUCCESS +19,SELECT * FROM IDENTIFIER('t' || 'a' || 'b'),SUCCESS,SUCCESS,SUCCESS +20,USE SCHEMA default,SUCCESS,SUCCESS,SUCCESS +21,DROP TABLE s.tab,SUCCESS,SUCCESS,SUCCESS +22,DROP SCHEMA s,SUCCESS,SUCCESS,SUCCESS +23,"SELECT IDENTIFIER('COAL' || 'ESCE')(NULL, 1)",SUCCESS,SUCCESS,SUCCESS +24,SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +25,"SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1)",SUCCESS,SUCCESS,SUCCESS +26,CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +27,DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +28,CREATE SCHEMA identifier_clauses,SUCCESS,SUCCESS,SUCCESS +29,USE identifier_clauses,SUCCESS,SUCCESS,SUCCESS +30,CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +31,DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' || 'tab'),SUCCESS,SUCCESS,SUCCESS +32,CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +33,REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION +34,CACHE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +35,UNCACHE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +36,DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +37,USE default,SUCCESS,SUCCESS,SUCCESS +38,DROP SCHEMA identifier_clauses,SUCCESS,SUCCESS,SUCCESS +39,CREATE TABLE tab(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +40,INSERT INTO tab VALUES (1),SUCCESS,SUCCESS,SUCCESS +41,SELECT c1 FROM tab,SUCCESS,SUCCESS,SUCCESS +42,DESCRIBE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +43,ANALYZE TABLE IDENTIFIER('ta' || 'b') COMPUTE STATISTICS,SUCCESS,SUCCESS,SUCCESS +44,ALTER TABLE IDENTIFIER('ta' || 'b') ADD COLUMN c2 INT,SUCCESS,SUCCESS,SUCCESS +45,SHOW TBLPROPERTIES IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +46,SHOW COLUMNS FROM IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +47,COMMENT ON TABLE IDENTIFIER('ta' || 'b') IS 'hello',SUCCESS,SUCCESS,SUCCESS +48,REFRESH TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +49,REPAIR TABLE IDENTIFIER('ta' || 'b'),NOT_A_PARTITIONED_TABLE,NOT_A_PARTITIONED_TABLE,NOT_A_PARTITIONED_TABLE +50,TRUNCATE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +51,DROP TABLE IF EXISTS tab,SUCCESS,SUCCESS,SUCCESS +52,CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1),SUCCESS,SUCCESS,SUCCESS +53,SELECT * FROM v,SUCCESS,SUCCESS,SUCCESS +54,ALTER VIEW IDENTIFIER('v') AS VALUES(2),SUCCESS,SUCCESS,SUCCESS +55,DROP VIEW IDENTIFIER('v'),SUCCESS,SUCCESS,SUCCESS +56,CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1),SUCCESS,SUCCESS,SUCCESS +57,DROP VIEW IDENTIFIER('v'),SUCCESS,SUCCESS,SUCCESS +58,CREATE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +59,ALTER SCHEMA IDENTIFIER('id' || 'ent') SET PROPERTIES (somekey = 'somevalue'),SUCCESS,SUCCESS,SUCCESS +60,ALTER SCHEMA IDENTIFIER('id' || 'ent') SET LOCATION 'someloc',SUCCESS,SUCCESS,SUCCESS +61,COMMENT ON SCHEMA IDENTIFIER('id' || 'ent') IS 'some comment',SUCCESS,SUCCESS,SUCCESS +62,DESCRIBE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +63,SHOW TABLES IN IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +64,SHOW TABLE EXTENDED IN IDENTIFIER('id' || 'ent') LIKE 'hello',SUCCESS,SUCCESS,SUCCESS +65,USE IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +66,SHOW CURRENT SCHEMA,SUCCESS,SUCCESS,SUCCESS +67,USE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +68,USE SCHEMA default,SUCCESS,SUCCESS,SUCCESS +69,DROP SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +70,CREATE SCHEMA ident,SUCCESS,SUCCESS,SUCCESS +71,CREATE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',SUCCESS,SUCCESS,SUCCESS +72,DESCRIBE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS +73,REFRESH FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS +74,DROP FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS +75,DROP SCHEMA ident,SUCCESS,SUCCESS,SUCCESS +76,CREATE TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',SUCCESS,SUCCESS,SUCCESS +77,DROP TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg'),SUCCESS,SUCCESS,SUCCESS +78,DECLARE var = 'sometable',SUCCESS,SUCCESS,SUCCESS +79,CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +80,SET VAR var = 'c1',SUCCESS,SUCCESS,SUCCESS +81,SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +82,SET VAR var = 'some',SUCCESS,SUCCESS,SUCCESS +83,DROP TABLE IDENTIFIER(var || 'table'),SUCCESS,SUCCESS,SUCCESS +84,SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`),PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +85,SELECT IDENTIFIER('') FROM VALUES(1) AS T(``),PARSE_EMPTY_STATEMENT,PARSE_EMPTY_STATEMENT,PARSE_EMPTY_STATEMENT +86,VALUES(IDENTIFIER(CAST(NULL AS STRING))),NOT_A_CONSTANT_STRING.NULL,NOT_A_CONSTANT_STRING.NULL,NOT_A_CONSTANT_STRING.NULL +87,VALUES(IDENTIFIER(1)),NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE +88,"VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1)))",NOT_A_CONSTANT_STRING.NOT_CONSTANT,NOT_A_CONSTANT_STRING.NOT_CONSTANT,NOT_A_CONSTANT_STRING.NOT_CONSTANT +89,SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1),UNRESOLVED_ROUTINE,UNRESOLVED_ROUTINE,UNRESOLVED_ROUTINE +90,CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE +91,CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +92,CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +93,DROP TABLE IDENTIFIER('a.b.c'),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +94,DROP VIEW IDENTIFIER('a.b.c'),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +95,COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello',REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +96,VALUES(IDENTIFIER(1)()),NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE +97,VALUES(IDENTIFIER('a.b.c.d')()),IDENTIFIER_TOO_MANY_NAME_PARTS,IDENTIFIER_TOO_MANY_NAME_PARTS,IDENTIFIER_TOO_MANY_NAME_PARTS +98,CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE,INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE,INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE +99,DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg'),INVALID_SQL_SYNTAX.MULTI_PART_NAME,INVALID_SQL_SYNTAX.MULTI_PART_NAME,INVALID_SQL_SYNTAX.MULTI_PART_NAME +100,CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1),TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS,TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS,TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS +101,"create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS +102,"cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS +103,"create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS +104,insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1,SUCCESS,SUCCESS,SUCCESS +105,drop view v1,SUCCESS,SUCCESS,SUCCESS +106,drop table t1,SUCCESS,SUCCESS,SUCCESS +107,drop table t2,SUCCESS,SUCCESS,SUCCESS +108,DECLARE agg = 'max',SUCCESS,SUCCESS,SUCCESS +109,DECLARE col = 'c1',SUCCESS,SUCCESS,SUCCESS +110,DECLARE tab = 'T',SUCCESS,SUCCESS,SUCCESS +111,"WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), + T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) +SELECT IDENTIFIER(agg)(IDENTIFIER(col)) FROM IDENTIFIER(tab)",SUCCESS,SUCCESS,SUCCESS +112,"WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), + T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) +SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('T')",SUCCESS,SUCCESS,SUCCESS +113,"WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) +SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' || 'BC')",SUCCESS,SUCCESS,SUCCESS +114,SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1),PARSE_SYNTAX_ERROR,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +115,SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +116,SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1),UNRESOLVED_COLUMN.WITH_SUGGESTION,UNRESOLVED_COLUMN.WITH_SUGGESTION,UNRESOLVED_COLUMN.WITH_SUGGESTION +117,"SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)",PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +118,"SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)",PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +119,SELECT * FROM s.IDENTIFIER('tab'),INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR +120,SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab'),PARSE_SYNTAX_ERROR,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR +121,SELECT * FROM IDENTIFIER('s').tab,PARSE_SYNTAX_ERROR,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR +122,SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +123,SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +124,SELECT 1 AS IDENTIFIER('col1'),NEW,SUCCESS,PARSE_SYNTAX_ERROR +125,"SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2'))",NEW,SUCCESS,PARSE_SYNTAX_ERROR +126,WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +127,CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +128,SELECT c1 FROM v,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +129,CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV,PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +130,INSERT INTO tab(IDENTIFIER('c1')) VALUES(1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +131,SELECT c1 FROM tab,SUCCESS,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +132,ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1'),NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR +133,SELECT col1 FROM tab,NEW,UNRESOLVED_COLUMN.WITH_SUGGESTION,TABLE_OR_VIEW_NOT_FOUND +134,ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT,NEW,SUCCESS,PARSE_SYNTAX_ERROR +135,SELECT c2 FROM tab,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +136,ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2'),NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR +137,ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed'),NEW,SUCCESS,PARSE_SYNTAX_ERROR +138,SELECT * FROM tab_renamed,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +139,CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV,NEW,SUCCESS,PARSE_SYNTAX_ERROR +140,DROP TABLE IF EXISTS test_col_with_dot,NEW,SUCCESS,SUCCESS +141,"SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2)",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +142,SELECT 1 AS IDENTIFIER('col1.col2'),NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +143,"CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS +144,SHOW VIEWS IN IDENTIFIER('default'),NEW,SUCCESS,SUCCESS +145,SHOW PARTITIONS IDENTIFIER('test_show'),NEW,INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY,INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY +146,SHOW CREATE TABLE IDENTIFIER('test_show'),NEW,SUCCESS,SUCCESS +147,DROP TABLE test_show,NEW,SUCCESS,SUCCESS +148,CREATE TABLE test_desc(c1 INT) USING CSV,NEW,SUCCESS,SUCCESS +149,DESCRIBE TABLE IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS +150,DESCRIBE FORMATTED IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS +151,DESCRIBE EXTENDED IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS +152,DESC IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS +153,DROP TABLE test_desc,NEW,SUCCESS,SUCCESS +154,"CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS +155,COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment',NEW,SUCCESS,SUCCESS +156,ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment',NEW,SUCCESS,PARSE_SYNTAX_ERROR +157,DROP TABLE test_comment,NEW,SUCCESS,SUCCESS +158,CREATE SCHEMA test_schema,NEW,SUCCESS,SUCCESS +159,CREATE TABLE test_schema.test_table(c1 INT) USING CSV,NEW,SUCCESS,SUCCESS +160,ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS,NEW,SUCCESS,SUCCESS +161,REFRESH TABLE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS +162,DESCRIBE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS +163,SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS +164,DROP TABLE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS +165,DROP SCHEMA test_schema,NEW,SUCCESS,SUCCESS +166,DECLARE IDENTIFIER('my_var') = 'value',NEW,SUCCESS,SUCCESS +167,SET VAR IDENTIFIER('my_var') = 'new_value',NEW,SUCCESS,PARSE_SYNTAX_ERROR +168,SELECT IDENTIFIER('my_var'),NEW,SUCCESS,SUCCESS +169,DROP TEMPORARY VARIABLE IDENTIFIER('my_var'),NEW,SUCCESS,SUCCESS +170,"CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) +RETURNS INT +RETURN IDENTIFIER('param1') + length(IDENTIFIER('param2'))",NEW,SUCCESS,PARSE_SYNTAX_ERROR +171,"SELECT test_udf(5, 'hello')",NEW,SUCCESS,UNRESOLVED_ROUTINE +172,DROP TEMPORARY FUNCTION test_udf,NEW,SUCCESS,ROUTINE_NOT_FOUND +173,"CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) +RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) +RETURN SELECT IDENTIFIER('input_val'), 'result'",NEW,SUCCESS,PARSE_SYNTAX_ERROR +174,SELECT * FROM test_table_udf(42),NEW,SUCCESS,UNRESOLVABLE_TABLE_VALUED_FUNCTION +175,DROP TEMPORARY FUNCTION test_table_udf,NEW,SUCCESS,ROUTINE_NOT_FOUND +176,"BEGIN + IDENTIFIER('loop_label'): LOOP + SELECT 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +177,LEAVE IDENTIFIER('loop_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +178,END LOOP loop_label,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +179,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +180,"BEGIN + block_label: BEGIN + DECLARE IDENTIFIER('x') INT DEFAULT 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +181,SELECT x,NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION +182,END IDENTIFIER('block_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +183,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +184,"BEGIN + DECLARE IDENTIFIER('counter') INT DEFAULT 0",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +185,"IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO + SET VAR counter = IDENTIFIER('counter') + 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +186,END WHILE while_label,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +187,SELECT IDENTIFIER('counter'),NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION +188,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +189,"BEGIN + DECLARE IDENTIFIER('cnt') INT DEFAULT 0",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +190,"repeat_label: REPEAT + SET VAR IDENTIFIER('cnt') = cnt + 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +191,"UNTIL IDENTIFIER('cnt') >= 2 + END REPEAT IDENTIFIER('repeat_label')",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +192,SELECT IDENTIFIER('cnt'),NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION +193,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +194,"BEGIN + IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO + SELECT row.c1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +195,END FOR IDENTIFIER('for_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +196,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +197,EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab,NEW,INVALID_EXTRACT_BASE_FIELD_TYPE,INVALID_EXTRACT_BASE_FIELD_TYPE +198,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' + USING 'c1' AS col1",NEW,SUCCESS,SUCCESS +199,"CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS +200,"INSERT INTO integration_test VALUES (1, 'a'), (2, 'b')",NEW,SUCCESS,SUCCESS +201,"EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' + USING 'default' AS schema, 'integration_test' AS table",NEW,SUCCESS,SUCCESS +202,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' + USING 'c' AS prefix",NEW,SUCCESS,SUCCESS +203,"EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' + USING 'c1' AS col, 1 AS val",NEW,SUCCESS,SUCCESS +204,"CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV",NEW,SUCCESS,SUCCESS +205,"INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y')",NEW,SUCCESS,SUCCESS +206,"EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' + USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col",NEW,SUCCESS,PARSE_SYNTAX_ERROR +207,"EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) ORDER BY IDENTIFIER(:ord)) as rn FROM integration_test' + USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord",NEW,SUCCESS,SUCCESS +208,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' + USING 'c' AS prefix, 'count' AS agg, 'c1' AS col",NEW,SUCCESS,SUCCESS +209,"EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' + USING 'c1' AS col1, 'c2' AS col2",NEW,SUCCESS,SUCCESS +210,"EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' + USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2",NEW,SUCCESS,PARSE_SYNTAX_ERROR +211,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' + USING 'default' AS schema, 'my_table' AS table, 't' AS alias",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +212,"EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' + USING 'my_cte' AS cte_name",NEW,SUCCESS,PARSE_SYNTAX_ERROR +213,"EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' + USING 'test_view' AS view_name, 'test_col' AS col_name",NEW,SUCCESS,PARSE_SYNTAX_ERROR +214,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' + USING 'test_col' AS col, 'test_view' AS view",NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +215,DROP VIEW test_view,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +216,"EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' + USING 'integration_test' AS tab, 'c4' AS new_col",NEW,SUCCESS,PARSE_SYNTAX_ERROR +217,"EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' + USING 'integration_test' AS tab, 'c4' AS old_col, 'c5' AS new_col",NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR +218,"EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' + USING 'mykey' AS key, 42 AS val",NEW,SUCCESS,PARSE_SYNTAX_ERROR +219,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' + USING 't' AS alias",NEW,UNRESOLVED_COLUMN.WITH_SUGGESTION,PARSE_SYNTAX_ERROR +220,"EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' + USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab",NEW,SUCCESS,SUCCESS +221,"EXECUTE IMMEDIATE + 'BEGIN + DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +222,SELECT IDENTIFIER(:var_name) AS result,NEW,UNBOUND_SQL_PARAMETER,UNBOUND_SQL_PARAMETER +223,"END' + USING 'my_variable' AS var_name, 100 AS var_value",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +224,"EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' + USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias",NEW,SUCCESS,SUCCESS +225,"EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' + USING 'default' AS schema, 'col1' AS col",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +226,DROP TABLE integration_test,NEW,SUCCESS,SUCCESS +227,DROP TABLE integration_test2,NEW,SUCCESS,SUCCESS diff --git a/identifier-clause-comparison-v2.md b/identifier-clause-comparison-v2.md new file mode 100644 index 000000000000..8e8fe11c915c --- /dev/null +++ b/identifier-clause-comparison-v2.md @@ -0,0 +1,452 @@ +# IDENTIFIER Clause Test Comparison (v2) + +Comprehensive comparison of IDENTIFIER clause behavior across different modes. + +- **Total Tests**: 227 +- **Tests from Master**: 128 +- **New Tests Added**: 99 +- **Tests Changed from Master**: 13 +- **Tests with Legacy Mode Differences**: 47 + +## Legend + +- **Query#**: Sequential test number +- **SQL Text**: The SQL query being tested +- **Master**: Result from master branch (before identifier-lite changes) +- **identifier-clause**: Result with identifier-lite enabled (default mode, current) +- **identifier-clause-legacy**: Result with `spark.sql.legacy.identifierClause=true` + +### Result Values + +- `SUCCESS`: Query executed successfully +- ``: Query failed with the specified error class +- `NEW`: Test did not exist in master baseline + +--- + +## Full Test Results + +| Query# | SQL Text | Master | identifier-clause | identifier-clause-legacy | +|--------|----------|--------|-------------------|--------------------------| +| 1 | SET hivevar:colname = 'c' | SUCCESS | SUCCESS | SUCCESS | +| 2 | SELECT IDENTIFIER(${colname} \|\| '_1') FROM VALUES(1) AS T(c_1) | SUCCESS | SUCCESS | SUCCESS | +| 3 | SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 4 | SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 5 | SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 6 | SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`) | SUCCESS | SUCCESS | SUCCESS | +| 7 | SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``) | SUCCESS | SUCCESS | SUCCESS | +| 8 | SELECT IDENTIFIER('c' \|\| '1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 9 | CREATE SCHEMA IF NOT EXISTS s | SUCCESS | SUCCESS | SUCCESS | +| 10 | CREATE TABLE s.tab(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 11 | USE SCHEMA s | SUCCESS | SUCCESS | SUCCESS | +| 12 | INSERT INTO IDENTIFIER('ta' \|\| 'b') VALUES(1) | SUCCESS | SUCCESS | SUCCESS | +| 13 | DELETE FROM IDENTIFIER('ta' \|\| 'b') WHERE 1=0 | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | +| 14 | UPDATE IDENTIFIER('ta' \|\| 'b') SET c1 = 2 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | +| 15 | MERGE INTO IDENTIFIER('ta' \|\| 'b') AS t USING IDENTIFIER('ta' \|\| 'b') AS s ON s.c1 = t.c1 WHEN MATCHED THEN UPD... | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | +| 16 | SELECT * FROM IDENTIFIER('tab') | SUCCESS | SUCCESS | SUCCESS | +| 17 | SELECT * FROM IDENTIFIER('s.tab') | SUCCESS | SUCCESS | SUCCESS | +| 18 | SELECT * FROM IDENTIFIER('`s`.`tab`') | SUCCESS | SUCCESS | SUCCESS | +| 19 | SELECT * FROM IDENTIFIER('t' \|\| 'a' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 20 | USE SCHEMA default | SUCCESS | SUCCESS | SUCCESS | +| 21 | DROP TABLE s.tab | SUCCESS | SUCCESS | SUCCESS | +| 22 | DROP SCHEMA s | SUCCESS | SUCCESS | SUCCESS | +| 23 | SELECT IDENTIFIER('COAL' \|\| 'ESCE')(NULL, 1) | SUCCESS | SUCCESS | SUCCESS | +| 24 | SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 25 | SELECT * FROM IDENTIFIER('ra' \|\| 'nge')(0, 1) | SUCCESS | SUCCESS | SUCCESS | +| 26 | CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 27 | DROP TABLE IF EXISTS IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 28 | CREATE SCHEMA identifier_clauses | SUCCESS | SUCCESS | SUCCESS | +| 29 | USE identifier_clauses | SUCCESS | SUCCESS | SUCCESS | +| 30 | CREATE TABLE IDENTIFIER('ta' \|\| 'b')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 31 | DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' \|\| 'tab') | SUCCESS | SUCCESS | SUCCESS | +| 32 | CREATE TABLE IDENTIFIER('identifier_clauses.' \|\| 'tab')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 33 | REPLACE TABLE IDENTIFIER('identifier_clauses.' \|\| 'tab')(c1 INT) USING CSV | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | +| 34 | CACHE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 35 | UNCACHE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 36 | DROP TABLE IF EXISTS IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 37 | USE default | SUCCESS | SUCCESS | SUCCESS | +| 38 | DROP SCHEMA identifier_clauses | SUCCESS | SUCCESS | SUCCESS | +| 39 | CREATE TABLE tab(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 40 | INSERT INTO tab VALUES (1) | SUCCESS | SUCCESS | SUCCESS | +| 41 | SELECT c1 FROM tab | SUCCESS | SUCCESS | SUCCESS | +| 42 | DESCRIBE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 43 | ANALYZE TABLE IDENTIFIER('ta' \|\| 'b') COMPUTE STATISTICS | SUCCESS | SUCCESS | SUCCESS | +| 44 | ALTER TABLE IDENTIFIER('ta' \|\| 'b') ADD COLUMN c2 INT | SUCCESS | SUCCESS | SUCCESS | +| 45 | SHOW TBLPROPERTIES IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 46 | SHOW COLUMNS FROM IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 47 | COMMENT ON TABLE IDENTIFIER('ta' \|\| 'b') IS 'hello' | SUCCESS | SUCCESS | SUCCESS | +| 48 | REFRESH TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 49 | REPAIR TABLE IDENTIFIER('ta' \|\| 'b') | NOT_A_PARTITIONED_TABLE | NOT_A_PARTITIONED_TABLE | NOT_A_PARTITIONED_TABLE | +| 50 | TRUNCATE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 51 | DROP TABLE IF EXISTS tab | SUCCESS | SUCCESS | SUCCESS | +| 52 | CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1) | SUCCESS | SUCCESS | SUCCESS | +| 53 | SELECT * FROM v | SUCCESS | SUCCESS | SUCCESS | +| 54 | ALTER VIEW IDENTIFIER('v') AS VALUES(2) | SUCCESS | SUCCESS | SUCCESS | +| 55 | DROP VIEW IDENTIFIER('v') | SUCCESS | SUCCESS | SUCCESS | +| 56 | CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1) | SUCCESS | SUCCESS | SUCCESS | +| 57 | DROP VIEW IDENTIFIER('v') | SUCCESS | SUCCESS | SUCCESS | +| 58 | CREATE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 59 | ALTER SCHEMA IDENTIFIER('id' \|\| 'ent') SET PROPERTIES (somekey = 'somevalue') | SUCCESS | SUCCESS | SUCCESS | +| 60 | ALTER SCHEMA IDENTIFIER('id' \|\| 'ent') SET LOCATION 'someloc' | SUCCESS | SUCCESS | SUCCESS | +| 61 | COMMENT ON SCHEMA IDENTIFIER('id' \|\| 'ent') IS 'some comment' | SUCCESS | SUCCESS | SUCCESS | +| 62 | DESCRIBE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 63 | SHOW TABLES IN IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 64 | SHOW TABLE EXTENDED IN IDENTIFIER('id' \|\| 'ent') LIKE 'hello' | SUCCESS | SUCCESS | SUCCESS | +| 65 | USE IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 66 | SHOW CURRENT SCHEMA | SUCCESS | SUCCESS | SUCCESS | +| 67 | USE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 68 | USE SCHEMA default | SUCCESS | SUCCESS | SUCCESS | +| 69 | DROP SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 70 | CREATE SCHEMA ident | SUCCESS | SUCCESS | SUCCESS | +| 71 | CREATE FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | SUCCESS | SUCCESS | SUCCESS | +| 72 | DESCRIBE FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | +| 73 | REFRESH FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | +| 74 | DROP FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | +| 75 | DROP SCHEMA ident | SUCCESS | SUCCESS | SUCCESS | +| 76 | CREATE TEMPORARY FUNCTION IDENTIFIER('my' \|\| 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | SUCCESS | SUCCESS | SUCCESS | +| 77 | DROP TEMPORARY FUNCTION IDENTIFIER('my' \|\| 'DoubleAvg') | SUCCESS | SUCCESS | SUCCESS | +| 78 | DECLARE var = 'sometable' | SUCCESS | SUCCESS | SUCCESS | +| 79 | CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 80 | SET VAR var = 'c1' | SUCCESS | SUCCESS | SUCCESS | +| 81 | SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 82 | SET VAR var = 'some' | SUCCESS | SUCCESS | SUCCESS | +| 83 | DROP TABLE IDENTIFIER(var \|\| 'table') | SUCCESS | SUCCESS | SUCCESS | +| 84 | SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`) | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 85 | SELECT IDENTIFIER('') FROM VALUES(1) AS T(``) | PARSE_EMPTY_STATEMENT | PARSE_EMPTY_STATEMENT | PARSE_EMPTY_STATEMENT | +| 86 | VALUES(IDENTIFIER(CAST(NULL AS STRING))) | NOT_A_CONSTANT_STRING.NULL | NOT_A_CONSTANT_STRING.NULL | NOT_A_CONSTANT_STRING.NULL | +| 87 | VALUES(IDENTIFIER(1)) | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | +| 88 | VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1))) | NOT_A_CONSTANT_STRING.NOT_CONSTANT | NOT_A_CONSTANT_STRING.NOT_CONSTANT | NOT_A_CONSTANT_STRING.NOT_CONSTANT | +| 89 | SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1) | UNRESOLVED_ROUTINE | UNRESOLVED_ROUTINE | UNRESOLVED_ROUTINE | +| 90 | CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | +| 91 | CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 92 | CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1) | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 93 | DROP TABLE IDENTIFIER('a.b.c') | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 94 | DROP VIEW IDENTIFIER('a.b.c') | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 95 | COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello' | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 96 | VALUES(IDENTIFIER(1)()) | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | +| 97 | VALUES(IDENTIFIER('a.b.c.d')()) | IDENTIFIER_TOO_MANY_NAME_PARTS | IDENTIFIER_TOO_MANY_NAME_PARTS | IDENTIFIER_TOO_MANY_NAME_PARTS | +| 98 | CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' \|\| 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | +| 99 | DROP TEMPORARY FUNCTION IDENTIFIER('default.my' \|\| 'DoubleAvg') | INVALID_SQL_SYNTAX.MULTI_PART_NAME | INVALID_SQL_SYNTAX.MULTI_PART_NAME | INVALID_SQL_SYNTAX.MULTI_PART_NAME | +| 100 | CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1) | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | +| 101 | create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | +| 102 | cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | +| 103 | create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | +| 104 | insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1 | SUCCESS | SUCCESS | SUCCESS | +| 105 | drop view v1 | SUCCESS | SUCCESS | SUCCESS | +| 106 | drop table t1 | SUCCESS | SUCCESS | SUCCESS | +| 107 | drop table t2 | SUCCESS | SUCCESS | SUCCESS | +| 108 | DECLARE agg = 'max' | SUCCESS | SUCCESS | SUCCESS | +| 109 | DECLARE col = 'c1' | SUCCESS | SUCCESS | SUCCESS | +| 110 | DECLARE tab = 'T' | SUCCESS | SUCCESS | SUCCESS | +| 111 | WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) SELECT IDENTIFIER(agg)(ID... | SUCCESS | SUCCESS | SUCCESS | +| 112 | WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) SELECT IDENTIFIER('max')(... | SUCCESS | SUCCESS | SUCCESS | +| 113 | WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' \|\| 'BC') | SUCCESS | SUCCESS | SUCCESS | +| 114 | SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **IDENTIFIER_TOO_MANY_NAME_PARTS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 115 | SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 116 | SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1) | UNRESOLVED_COLUMN.WITH_SUGGESTION | UNRESOLVED_COLUMN.WITH_SUGGESTION | UNRESOLVED_COLUMN.WITH_SUGGESTION | +| 117 | SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 118 | SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 119 | SELECT * FROM s.IDENTIFIER('tab') | INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 120 | SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 121 | SELECT * FROM IDENTIFIER('s').tab | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 122 | SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 123 | SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 124 | SELECT 1 AS IDENTIFIER('col1') | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 125 | SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 126 | WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 127 | CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 128 | SELECT c1 FROM v | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 129 | CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 130 | INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 131 | SELECT c1 FROM tab | SUCCESS | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 132 | ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | +| 133 | SELECT col1 FROM tab | **NEW** | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **TABLE_OR_VIEW_NOT_FOUND** | +| 134 | ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 135 | SELECT c2 FROM tab | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 136 | ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | +| 137 | ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 138 | SELECT * FROM tab_renamed | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 139 | CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 140 | DROP TABLE IF EXISTS test_col_with_dot | **NEW** | SUCCESS | SUCCESS | +| 141 | SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 142 | SELECT 1 AS IDENTIFIER('col1.col2') | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 143 | CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 144 | SHOW VIEWS IN IDENTIFIER('default') | **NEW** | SUCCESS | SUCCESS | +| 145 | SHOW PARTITIONS IDENTIFIER('test_show') | **NEW** | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | +| 146 | SHOW CREATE TABLE IDENTIFIER('test_show') | **NEW** | SUCCESS | SUCCESS | +| 147 | DROP TABLE test_show | **NEW** | SUCCESS | SUCCESS | +| 148 | CREATE TABLE test_desc(c1 INT) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 149 | DESCRIBE TABLE IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | +| 150 | DESCRIBE FORMATTED IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | +| 151 | DESCRIBE EXTENDED IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | +| 152 | DESC IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | +| 153 | DROP TABLE test_desc | **NEW** | SUCCESS | SUCCESS | +| 154 | CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 155 | COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' | **NEW** | SUCCESS | SUCCESS | +| 156 | ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 157 | DROP TABLE test_comment | **NEW** | SUCCESS | SUCCESS | +| 158 | CREATE SCHEMA test_schema | **NEW** | SUCCESS | SUCCESS | +| 159 | CREATE TABLE test_schema.test_table(c1 INT) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 160 | ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS | **NEW** | SUCCESS | SUCCESS | +| 161 | REFRESH TABLE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | +| 162 | DESCRIBE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | +| 163 | SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | +| 164 | DROP TABLE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | +| 165 | DROP SCHEMA test_schema | **NEW** | SUCCESS | SUCCESS | +| 166 | DECLARE IDENTIFIER('my_var') = 'value' | **NEW** | SUCCESS | SUCCESS | +| 167 | SET VAR IDENTIFIER('my_var') = 'new_value' | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 168 | SELECT IDENTIFIER('my_var') | **NEW** | SUCCESS | SUCCESS | +| 169 | DROP TEMPORARY VARIABLE IDENTIFIER('my_var') | **NEW** | SUCCESS | SUCCESS | +| 170 | CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) RETURNS INT RETURN IDENTIFI... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 171 | SELECT test_udf(5, 'hello') | **NEW** | **SUCCESS** | **UNRESOLVED_ROUTINE** | +| 172 | DROP TEMPORARY FUNCTION test_udf | **NEW** | **SUCCESS** | **ROUTINE_NOT_FOUND** | +| 173 | CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIE... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 174 | SELECT * FROM test_table_udf(42) | **NEW** | **SUCCESS** | **UNRESOLVABLE_TABLE_VALUED_FUNCTION** | +| 175 | DROP TEMPORARY FUNCTION test_table_udf | **NEW** | **SUCCESS** | **ROUTINE_NOT_FOUND** | +| 176 | BEGIN IDENTIFIER('loop_label'): LOOP SELECT 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 177 | LEAVE IDENTIFIER('loop_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 178 | END LOOP loop_label | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 179 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 180 | BEGIN block_label: BEGIN DECLARE IDENTIFIER('x') INT DEFAULT 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 181 | SELECT x | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | +| 182 | END IDENTIFIER('block_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 183 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 184 | BEGIN DECLARE IDENTIFIER('counter') INT DEFAULT 0 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 185 | IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO SET VAR counter = IDENTIFIER('counter') + 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 186 | END WHILE while_label | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 187 | SELECT IDENTIFIER('counter') | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | +| 188 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 189 | BEGIN DECLARE IDENTIFIER('cnt') INT DEFAULT 0 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 190 | repeat_label: REPEAT SET VAR IDENTIFIER('cnt') = cnt + 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 191 | UNTIL IDENTIFIER('cnt') >= 2 END REPEAT IDENTIFIER('repeat_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 192 | SELECT IDENTIFIER('cnt') | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | +| 193 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 194 | BEGIN IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO SELECT row.c1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 195 | END FOR IDENTIFIER('for_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 196 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 197 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab | **NEW** | INVALID_EXTRACT_BASE_FIELD_TYPE | INVALID_EXTRACT_BASE_FIELD_TYPE | +| 198 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' USING 'c1' AS col1 | **NEW** | SUCCESS | SUCCESS | +| 199 | CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 200 | INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') | **NEW** | SUCCESS | SUCCESS | +| 201 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' USING 'default' AS schema, 'integration_test' AS... | **NEW** | SUCCESS | SUCCESS | +| 202 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' USING 'c' AS ... | **NEW** | SUCCESS | SUCCESS | +| 203 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' USING 'c1' AS col, 1 AS val | **NEW** | SUCCESS | SUCCESS | +| 204 | CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 205 | INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') | **NEW** | SUCCESS | SUCCESS | +| 206 | EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' USIN... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 207 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) O... | **NEW** | SUCCESS | SUCCESS | +| 208 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP B... | **NEW** | SUCCESS | SUCCESS | +| 209 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' USING 'c1' AS... | **NEW** | SUCCESS | SUCCESS | +| 210 | EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' USING ... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 211 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) ... | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 212 | EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' USING 'my_ct... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 213 | EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' USI... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 214 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' USING 'test_col' AS col, 'test_view' AS view | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 215 | DROP VIEW test_view | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 216 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' USING 'integration_test' AS ta... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 217 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' USING '... | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | +| 218 | EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' USING 'mykey' AS key, 42 AS val | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 219 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' USING 't' AS alias | **NEW** | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **PARSE_SYNTAX_ERROR** | +| 220 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIF... | **NEW** | SUCCESS | SUCCESS | +| 221 | EXECUTE IMMEDIATE 'BEGIN DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 222 | SELECT IDENTIFIER(:var_name) AS result | **NEW** | UNBOUND_SQL_PARAMETER | UNBOUND_SQL_PARAMETER | +| 223 | END' USING 'my_variable' AS var_name, 100 AS var_value | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 224 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' ... | **NEW** | SUCCESS | SUCCESS | +| 225 | EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' USING 'default' AS schema, 'col1' AS col | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 226 | DROP TABLE integration_test | **NEW** | SUCCESS | SUCCESS | +| 227 | DROP TABLE integration_test2 | **NEW** | SUCCESS | SUCCESS | + +--- + +## New Tests Added + +### 99 New Tests + +| Query# | SQL Text | identifier-clause | identifier-clause-legacy | +|--------|----------|-------------------|--------------------------| +| 124 | SELECT 1 AS IDENTIFIER('col1') | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 125 | SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 128 | SELECT c1 FROM v | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 132 | ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | +| 133 | SELECT col1 FROM tab | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **TABLE_OR_VIEW_NOT_FOUND** | +| 134 | ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 135 | SELECT c2 FROM tab | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 136 | ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | +| 137 | ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 138 | SELECT * FROM tab_renamed | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 139 | CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 140 | DROP TABLE IF EXISTS test_col_with_dot | SUCCESS | SUCCESS | +| 141 | SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 142 | SELECT 1 AS IDENTIFIER('col1.col2') | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 143 | CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV | SUCCESS | SUCCESS | +| 144 | SHOW VIEWS IN IDENTIFIER('default') | SUCCESS | SUCCESS | +| 145 | SHOW PARTITIONS IDENTIFIER('test_show') | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | +| 146 | SHOW CREATE TABLE IDENTIFIER('test_show') | SUCCESS | SUCCESS | +| 147 | DROP TABLE test_show | SUCCESS | SUCCESS | +| 148 | CREATE TABLE test_desc(c1 INT) USING CSV | SUCCESS | SUCCESS | +| 149 | DESCRIBE TABLE IDENTIFIER('test_desc') | SUCCESS | SUCCESS | +| 150 | DESCRIBE FORMATTED IDENTIFIER('test_desc') | SUCCESS | SUCCESS | +| 151 | DESCRIBE EXTENDED IDENTIFIER('test_desc') | SUCCESS | SUCCESS | +| 152 | DESC IDENTIFIER('test_desc') | SUCCESS | SUCCESS | +| 153 | DROP TABLE test_desc | SUCCESS | SUCCESS | +| 154 | CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV | SUCCESS | SUCCESS | +| 155 | COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' | SUCCESS | SUCCESS | +| 156 | ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 157 | DROP TABLE test_comment | SUCCESS | SUCCESS | +| 158 | CREATE SCHEMA test_schema | SUCCESS | SUCCESS | +| 159 | CREATE TABLE test_schema.test_table(c1 INT) USING CSV | SUCCESS | SUCCESS | +| 160 | ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS | SUCCESS | SUCCESS | +| 161 | REFRESH TABLE IDENTIFIER('test_schema.test_table') | SUCCESS | SUCCESS | +| 162 | DESCRIBE IDENTIFIER('test_schema.test_table') | SUCCESS | SUCCESS | +| 163 | SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') | SUCCESS | SUCCESS | +| 164 | DROP TABLE IDENTIFIER('test_schema.test_table') | SUCCESS | SUCCESS | +| 165 | DROP SCHEMA test_schema | SUCCESS | SUCCESS | +| 166 | DECLARE IDENTIFIER('my_var') = 'value' | SUCCESS | SUCCESS | +| 167 | SET VAR IDENTIFIER('my_var') = 'new_value' | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 168 | SELECT IDENTIFIER('my_var') | SUCCESS | SUCCESS | +| 169 | DROP TEMPORARY VARIABLE IDENTIFIER('my_var') | SUCCESS | SUCCESS | +| 170 | CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) RETURNS... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 171 | SELECT test_udf(5, 'hello') | **SUCCESS** | **UNRESOLVED_ROUTINE** | +| 172 | DROP TEMPORARY FUNCTION test_udf | **SUCCESS** | **ROUTINE_NOT_FOUND** | +| 173 | CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) RETURNS TABLE(IDENTIFIER('c... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 174 | SELECT * FROM test_table_udf(42) | **SUCCESS** | **UNRESOLVABLE_TABLE_VALUED_FUNCTION** | +| 175 | DROP TEMPORARY FUNCTION test_table_udf | **SUCCESS** | **ROUTINE_NOT_FOUND** | +| 176 | BEGIN IDENTIFIER('loop_label'): LOOP SELECT 1 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 177 | LEAVE IDENTIFIER('loop_label') | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 178 | END LOOP loop_label | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 179 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 180 | BEGIN block_label: BEGIN DECLARE IDENTIFIER('x') INT DEFAULT 1 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 181 | SELECT x | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | +| 182 | END IDENTIFIER('block_label') | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 183 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 184 | BEGIN DECLARE IDENTIFIER('counter') INT DEFAULT 0 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 185 | IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO SET VAR counter = IDENTIFIER('c... | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 186 | END WHILE while_label | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 187 | SELECT IDENTIFIER('counter') | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | +| 188 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 189 | BEGIN DECLARE IDENTIFIER('cnt') INT DEFAULT 0 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 190 | repeat_label: REPEAT SET VAR IDENTIFIER('cnt') = cnt + 1 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 191 | UNTIL IDENTIFIER('cnt') >= 2 END REPEAT IDENTIFIER('repeat_label') | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 192 | SELECT IDENTIFIER('cnt') | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | +| 193 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 194 | BEGIN IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO SELECT row.c1 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 195 | END FOR IDENTIFIER('for_label') | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 196 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 197 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab | INVALID_EXTRACT_BASE_FIELD_TYPE | INVALID_EXTRACT_BASE_FIELD_TYPE | +| 198 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1... | SUCCESS | SUCCESS | +| 199 | CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV | SUCCESS | SUCCESS | +| 200 | INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') | SUCCESS | SUCCESS | +| 201 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' USING 'default' AS schema, '... | SUCCESS | SUCCESS | +| 202 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_t... | SUCCESS | SUCCESS | +| 203 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' USING 'c1' AS ... | SUCCESS | SUCCESS | +| 204 | CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV | SUCCESS | SUCCESS | +| 205 | INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') | SUCCESS | SUCCESS | +| 206 | EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENT... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 207 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY... | SUCCESS | SUCCESS | +| 208 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM inte... | SUCCESS | SUCCESS | +| 209 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:co... | SUCCESS | SUCCESS | +| 210 | EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:va... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 211 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_s... | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 212 | EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_n... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 213 | EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name))... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 214 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' USING 'test_col' AS col, 'te... | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 215 | DROP VIEW test_view | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 216 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' USING 'int... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 217 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(... | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | +| 218 | EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' USING 'mykey' AS key, 42 ... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 219 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)'... | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **PARSE_SYNTAX_ERROR** | +| 220 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.''... | SUCCESS | SUCCESS | +| 221 | EXECUTE IMMEDIATE 'BEGIN DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 222 | SELECT IDENTIFIER(:var_name) AS result | UNBOUND_SQL_PARAMETER | UNBOUND_SQL_PARAMETER | +| 223 | END' USING 'my_variable' AS var_name, 100 AS var_value | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 224 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_al... | SUCCESS | SUCCESS | +| 225 | EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' USING 'default' AS schema, 'col1... | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 226 | DROP TABLE integration_test | SUCCESS | SUCCESS | +| 227 | DROP TABLE integration_test2 | SUCCESS | SUCCESS | + +--- + +## Tests Changed from Master + +### 13 Tests with Different Behavior from Master + +| Query# | SQL Text | Master | identifier-clause | Notes | +|--------|----------|--------|-------------------|-------| +| 114 | SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW w... | PARSE_SYNTAX_ERROR | **IDENTIFIER_TOO_MANY_NAME_PARTS** | Behavior changed | +| 115 | SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIF... | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | +| 117 | SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | +| 118 | SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | +| 119 | SELECT * FROM s.IDENTIFIER('tab') | INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME | **TABLE_OR_VIEW_NOT_FOUND** | Behavior changed | +| 120 | SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** | Behavior changed | +| 121 | SELECT * FROM IDENTIFIER('s').tab | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** | Behavior changed | +| 122 | SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win... | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | +| 123 | SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win')... | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | +| 126 | WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | +| 127 | CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | +| 129 | CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | +| 130 | INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | + +--- + +## Legacy Mode Differences + +### 47 Tests with Different Behavior in Legacy Mode + +| Query# | identifier-clause | identifier-clause-legacy | SQL Text | +|--------|-------------------|--------------------------|----------| +| 114 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW w... | +| 115 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIF... | +| 117 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | +| 118 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | +| 119 | TABLE_OR_VIEW_NOT_FOUND | PARSE_SYNTAX_ERROR | SELECT * FROM s.IDENTIFIER('tab') | +| 120 | TABLE_OR_VIEW_NOT_FOUND | PARSE_SYNTAX_ERROR | SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') | +| 121 | TABLE_OR_VIEW_NOT_FOUND | PARSE_SYNTAX_ERROR | SELECT * FROM IDENTIFIER('s').tab | +| 122 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win... | +| 123 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win')... | +| 124 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT 1 AS IDENTIFIER('col1') | +| 125 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1... | +| 126 | SUCCESS | PARSE_SYNTAX_ERROR | WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) | +| 127 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) | +| 128 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | SELECT c1 FROM v | +| 129 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV | +| 130 | SUCCESS | PARSE_SYNTAX_ERROR | INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) | +| 131 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | SELECT c1 FROM tab | +| 132 | UNSUPPORTED_FEATURE.TABLE_OPERATION | PARSE_SYNTAX_ERROR | ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('c... | +| 133 | UNRESOLVED_COLUMN.WITH_SUGGESTION | TABLE_OR_VIEW_NOT_FOUND | SELECT col1 FROM tab | +| 134 | SUCCESS | PARSE_SYNTAX_ERROR | ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT | +| 135 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | SELECT c2 FROM tab | +| 136 | UNSUPPORTED_FEATURE.TABLE_OPERATION | PARSE_SYNTAX_ERROR | ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') | +| 137 | SUCCESS | PARSE_SYNTAX_ERROR | ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') | +| 138 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | SELECT * FROM tab_renamed | +| 139 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV | +| 141 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) | +| 142 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | SELECT 1 AS IDENTIFIER('col1.col2') | +| 156 | SUCCESS | PARSE_SYNTAX_ERROR | ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' | +| 167 | SUCCESS | PARSE_SYNTAX_ERROR | SET VAR IDENTIFIER('my_var') = 'new_value' | +| 170 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('para... | +| 171 | SUCCESS | UNRESOLVED_ROUTINE | SELECT test_udf(5, 'hello') | +| 172 | SUCCESS | ROUTINE_NOT_FOUND | DROP TEMPORARY FUNCTION test_udf | +| 173 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) RETURNS... | +| 174 | SUCCESS | UNRESOLVABLE_TABLE_VALUED_FUNCTION | SELECT * FROM test_table_udf(42) | +| 175 | SUCCESS | ROUTINE_NOT_FOUND | DROP TEMPORARY FUNCTION test_table_udf | +| 206 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(... | +| 210 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER... | +| 211 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1''))... | +| 212 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FR... | +| 213 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(ID... | +| 214 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' USING 't... | +| 215 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | DROP VIEW test_view | +| 216 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_co... | +| 217 | UNSUPPORTED_FEATURE.TABLE_OPERATION | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old... | +| 218 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' USING... | +| 219 | UNRESOLVED_COLUMN.WITH_SUGGESTION | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS... | +| 225 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' USING 'defau... | + +--- + +## Summary Statistics + +- **Regression Check**: 13 tests changed from master +- **New Functionality**: 99 new tests added +- **Legacy Compatibility**: 47 tests differ between modes (20.7%) +- **Stability**: 115 existing tests unchanged (50.7%) diff --git a/identifier-clause-comparison.csv b/identifier-clause-comparison.csv new file mode 100644 index 000000000000..89c33a385ad9 --- /dev/null +++ b/identifier-clause-comparison.csv @@ -0,0 +1,274 @@ +Query#,SQL Text,Master,identifier-clause,identifier-clause-legacy +1,SET hivevar:colname = 'c',SUCCESS,SUCCESS,SUCCESS +2,SELECT IDENTIFIER(${colname} || '_1') FROM VALUES(1) AS T(c_1),SUCCESS,SUCCESS,SUCCESS +3,SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +4,SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +5,SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +6,SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`),SUCCESS,SUCCESS,SUCCESS +7,SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``),SUCCESS,SUCCESS,SUCCESS +8,SELECT IDENTIFIER('c' || '1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +9,CREATE SCHEMA IF NOT EXISTS s,SUCCESS,SUCCESS,SUCCESS +10,CREATE TABLE s.tab(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +11,USE SCHEMA s,SUCCESS,SUCCESS,SUCCESS +12,INSERT INTO IDENTIFIER('ta' || 'b') VALUES(1),SUCCESS,SUCCESS,SUCCESS +13,DELETE FROM IDENTIFIER('ta' || 'b') WHERE 1=0,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION +14,UPDATE IDENTIFIER('ta' || 'b') SET c1 = 2,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096 +15,"MERGE INTO IDENTIFIER('ta' || 'b') AS t USING IDENTIFIER('ta' || 'b') AS s ON s.c1 = t.c1 + WHEN MATCHED THEN UPDATE SET c1 = 3",_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096 +16,SELECT * FROM IDENTIFIER('tab'),SUCCESS,SUCCESS,SUCCESS +17,SELECT * FROM IDENTIFIER('s.tab'),SUCCESS,SUCCESS,SUCCESS +18,SELECT * FROM IDENTIFIER('`s`.`tab`'),SUCCESS,SUCCESS,SUCCESS +19,SELECT * FROM IDENTIFIER('t' || 'a' || 'b'),SUCCESS,SUCCESS,SUCCESS +20,USE SCHEMA default,SUCCESS,SUCCESS,SUCCESS +21,DROP TABLE s.tab,SUCCESS,SUCCESS,SUCCESS +22,DROP SCHEMA s,SUCCESS,SUCCESS,SUCCESS +23,"SELECT IDENTIFIER('COAL' || 'ESCE')(NULL, 1)",SUCCESS,SUCCESS,SUCCESS +24,SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +25,"SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1)",SUCCESS,SUCCESS,SUCCESS +26,CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +27,DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +28,CREATE SCHEMA identifier_clauses,SUCCESS,SUCCESS,SUCCESS +29,USE identifier_clauses,SUCCESS,SUCCESS,SUCCESS +30,CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +31,DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' || 'tab'),SUCCESS,SUCCESS,SUCCESS +32,CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +33,REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION +34,CACHE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +35,UNCACHE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +36,DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +37,USE default,SUCCESS,SUCCESS,SUCCESS +38,DROP SCHEMA identifier_clauses,SUCCESS,SUCCESS,SUCCESS +39,CREATE TABLE tab(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +40,INSERT INTO tab VALUES (1),SUCCESS,SUCCESS,SUCCESS +41,SELECT c1 FROM tab,SUCCESS,SUCCESS,SUCCESS +42,DESCRIBE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +43,ANALYZE TABLE IDENTIFIER('ta' || 'b') COMPUTE STATISTICS,SUCCESS,SUCCESS,SUCCESS +44,ALTER TABLE IDENTIFIER('ta' || 'b') ADD COLUMN c2 INT,SUCCESS,SUCCESS,SUCCESS +45,SHOW TBLPROPERTIES IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +46,SHOW COLUMNS FROM IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +47,COMMENT ON TABLE IDENTIFIER('ta' || 'b') IS 'hello',SUCCESS,SUCCESS,SUCCESS +48,REFRESH TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +49,REPAIR TABLE IDENTIFIER('ta' || 'b'),NOT_A_PARTITIONED_TABLE,NOT_A_PARTITIONED_TABLE,NOT_A_PARTITIONED_TABLE +50,TRUNCATE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS +51,DROP TABLE IF EXISTS tab,SUCCESS,SUCCESS,SUCCESS +52,CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1),SUCCESS,SUCCESS,SUCCESS +53,SELECT * FROM v,SUCCESS,SUCCESS,SUCCESS +54,ALTER VIEW IDENTIFIER('v') AS VALUES(2),SUCCESS,SUCCESS,SUCCESS +55,DROP VIEW IDENTIFIER('v'),SUCCESS,SUCCESS,SUCCESS +56,CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1),SUCCESS,SUCCESS,SUCCESS +57,DROP VIEW IDENTIFIER('v'),SUCCESS,SUCCESS,SUCCESS +58,CREATE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +59,ALTER SCHEMA IDENTIFIER('id' || 'ent') SET PROPERTIES (somekey = 'somevalue'),SUCCESS,SUCCESS,SUCCESS +60,ALTER SCHEMA IDENTIFIER('id' || 'ent') SET LOCATION 'someloc',SUCCESS,SUCCESS,SUCCESS +61,COMMENT ON SCHEMA IDENTIFIER('id' || 'ent') IS 'some comment',SUCCESS,SUCCESS,SUCCESS +62,DESCRIBE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +63,SHOW TABLES IN IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +64,SHOW TABLE EXTENDED IN IDENTIFIER('id' || 'ent') LIKE 'hello',SUCCESS,SUCCESS,SUCCESS +65,USE IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +66,SHOW CURRENT SCHEMA,SUCCESS,SUCCESS,SUCCESS +67,USE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +68,USE SCHEMA default,SUCCESS,SUCCESS,SUCCESS +69,DROP SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS +70,CREATE SCHEMA ident,SUCCESS,SUCCESS,SUCCESS +71,CREATE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',SUCCESS,SUCCESS,SUCCESS +72,DESCRIBE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS +73,REFRESH FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS +74,DROP FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS +75,DROP SCHEMA ident,SUCCESS,SUCCESS,SUCCESS +76,CREATE TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',SUCCESS,SUCCESS,SUCCESS +77,DROP TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg'),SUCCESS,SUCCESS,SUCCESS +78,DECLARE var = 'sometable',SUCCESS,SUCCESS,SUCCESS +79,CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS +80,SET VAR var = 'c1',SUCCESS,SUCCESS,SUCCESS +81,SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS +82,SET VAR var = 'some',SUCCESS,SUCCESS,SUCCESS +83,DROP TABLE IDENTIFIER(var || 'table'),SUCCESS,SUCCESS,SUCCESS +84,SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`),PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +85,SELECT IDENTIFIER('') FROM VALUES(1) AS T(``),PARSE_EMPTY_STATEMENT,PARSE_EMPTY_STATEMENT,PARSE_EMPTY_STATEMENT +86,VALUES(IDENTIFIER(CAST(NULL AS STRING))),NOT_A_CONSTANT_STRING.NULL,NOT_A_CONSTANT_STRING.NULL,NOT_A_CONSTANT_STRING.NULL +87,VALUES(IDENTIFIER(1)),NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE +88,"VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1)))",NOT_A_CONSTANT_STRING.NOT_CONSTANT,NOT_A_CONSTANT_STRING.NOT_CONSTANT,NOT_A_CONSTANT_STRING.NOT_CONSTANT +89,SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1),UNRESOLVED_ROUTINE,UNRESOLVED_ROUTINE,UNRESOLVED_ROUTINE +90,CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE +91,CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +92,CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +93,DROP TABLE IDENTIFIER('a.b.c'),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +94,DROP VIEW IDENTIFIER('a.b.c'),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +95,COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello',REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE +96,VALUES(IDENTIFIER(1)()),NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE +97,VALUES(IDENTIFIER('a.b.c.d')()),IDENTIFIER_TOO_MANY_NAME_PARTS,IDENTIFIER_TOO_MANY_NAME_PARTS,IDENTIFIER_TOO_MANY_NAME_PARTS +98,CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE,INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE,INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE +99,DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg'),INVALID_SQL_SYNTAX.MULTI_PART_NAME,INVALID_SQL_SYNTAX.MULTI_PART_NAME,INVALID_SQL_SYNTAX.MULTI_PART_NAME +100,CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1),TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS,TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS,TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS +101,"create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS +102,"cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS +103,"create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS +104,insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1,SUCCESS,SUCCESS,SUCCESS +105,drop view v1,SUCCESS,SUCCESS,SUCCESS +106,drop table t1,SUCCESS,SUCCESS,SUCCESS +107,drop table t2,SUCCESS,SUCCESS,SUCCESS +108,DECLARE agg = 'max',SUCCESS,SUCCESS,SUCCESS +109,DECLARE col = 'c1',SUCCESS,SUCCESS,SUCCESS +110,DECLARE tab = 'T',SUCCESS,SUCCESS,SUCCESS +111,"WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), + T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) +SELECT IDENTIFIER(agg)(IDENTIFIER(col)) FROM IDENTIFIER(tab)",SUCCESS,SUCCESS,SUCCESS +112,"WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), + T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) +SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('T')",SUCCESS,SUCCESS,SUCCESS +113,"WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) +SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' || 'BC')",SUCCESS,SUCCESS,SUCCESS +114,SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1),PARSE_SYNTAX_ERROR,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +115,SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +116,SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1),UNRESOLVED_COLUMN.WITH_SUGGESTION,UNRESOLVED_COLUMN.WITH_SUGGESTION,UNRESOLVED_COLUMN.WITH_SUGGESTION +117,"SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)",PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +118,"SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)",PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +119,SELECT * FROM s.IDENTIFIER('tab'),INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR +120,SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab'),PARSE_SYNTAX_ERROR,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR +121,SELECT * FROM IDENTIFIER('s').tab,PARSE_SYNTAX_ERROR,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR +122,SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +123,SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +124,SELECT 1 AS IDENTIFIER('col1'),NEW,SUCCESS,PARSE_SYNTAX_ERROR +125,"SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2'))",NEW,SUCCESS,PARSE_SYNTAX_ERROR +126,WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +127,CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +128,SELECT c1 FROM v,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +129,CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV,PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +130,INSERT INTO tab(IDENTIFIER('c1')) VALUES(1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR +131,SELECT c1 FROM tab,SUCCESS,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +132,ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1'),NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR +133,SELECT col1 FROM tab,NEW,UNRESOLVED_COLUMN.WITH_SUGGESTION,TABLE_OR_VIEW_NOT_FOUND +134,ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT,NEW,SUCCESS,PARSE_SYNTAX_ERROR +135,SELECT c2 FROM tab,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +136,ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2'),NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR +137,ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed'),NEW,SUCCESS,PARSE_SYNTAX_ERROR +138,SELECT * FROM tab_renamed,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +139,CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV,NEW,SUCCESS,PARSE_SYNTAX_ERROR +140,DROP TABLE IF EXISTS test_col_with_dot,NEW,SUCCESS,SUCCESS +141,"SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2)",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +142,SELECT 1 AS IDENTIFIER('col1.col2'),NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +143,"CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS +144,SHOW VIEWS IN IDENTIFIER('default'),NEW,SUCCESS,SUCCESS +145,SHOW PARTITIONS IDENTIFIER('test_show'),NEW,INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY,INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY +146,SHOW CREATE TABLE IDENTIFIER('test_show'),NEW,SUCCESS,SUCCESS +147,DROP TABLE test_show,NEW,SUCCESS,SUCCESS +148,CREATE TABLE test_desc(c1 INT) USING CSV,NEW,SUCCESS,SUCCESS +149,DESCRIBE TABLE IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS +150,DESCRIBE FORMATTED IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS +151,DESCRIBE EXTENDED IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS +152,DESC IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS +153,DROP TABLE test_desc,NEW,SUCCESS,SUCCESS +154,"CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS +155,COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment',NEW,SUCCESS,SUCCESS +156,ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment',NEW,SUCCESS,PARSE_SYNTAX_ERROR +157,DROP TABLE test_comment,NEW,SUCCESS,SUCCESS +158,CREATE SCHEMA test_schema,NEW,SUCCESS,SUCCESS +159,CREATE TABLE test_schema.test_table(c1 INT) USING CSV,NEW,SUCCESS,SUCCESS +160,ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS,NEW,SUCCESS,SUCCESS +161,REFRESH TABLE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS +162,DESCRIBE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS +163,SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS +164,DROP TABLE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS +165,DROP SCHEMA test_schema,NEW,SUCCESS,SUCCESS +166,DECLARE IDENTIFIER('my_var') = 'value',NEW,SUCCESS,SUCCESS +167,SET VAR IDENTIFIER('my_var') = 'new_value',NEW,SUCCESS,PARSE_SYNTAX_ERROR +168,SELECT IDENTIFIER('my_var'),NEW,SUCCESS,SUCCESS +169,DROP TEMPORARY VARIABLE IDENTIFIER('my_var'),NEW,SUCCESS,SUCCESS +170,"CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) +RETURNS INT +RETURN IDENTIFIER('param1') + length(IDENTIFIER('param2'))",NEW,SUCCESS,PARSE_SYNTAX_ERROR +171,"SELECT test_udf(5, 'hello')",NEW,SUCCESS,UNRESOLVED_ROUTINE +172,DROP TEMPORARY FUNCTION test_udf,NEW,SUCCESS,ROUTINE_NOT_FOUND +173,"CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) +RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) +RETURN SELECT IDENTIFIER('input_val'), 'result'",NEW,SUCCESS,PARSE_SYNTAX_ERROR +174,SELECT * FROM test_table_udf(42),NEW,SUCCESS,UNRESOLVABLE_TABLE_VALUED_FUNCTION +175,DROP TEMPORARY FUNCTION test_table_udf,NEW,SUCCESS,ROUTINE_NOT_FOUND +176,"BEGIN + IDENTIFIER('loop_label'): LOOP + SELECT 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +177,LEAVE IDENTIFIER('loop_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +178,END LOOP loop_label,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +179,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +180,"BEGIN + block_label: BEGIN + DECLARE IDENTIFIER('x') INT DEFAULT 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +181,SELECT x,NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION +182,END IDENTIFIER('block_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +183,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +184,"BEGIN + DECLARE IDENTIFIER('counter') INT DEFAULT 0",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +185,"IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO + SET VAR counter = IDENTIFIER('counter') + 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +186,END WHILE while_label,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +187,SELECT IDENTIFIER('counter'),NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION +188,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +189,"BEGIN + DECLARE IDENTIFIER('cnt') INT DEFAULT 0",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +190,"repeat_label: REPEAT + SET VAR IDENTIFIER('cnt') = cnt + 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +191,"UNTIL IDENTIFIER('cnt') >= 2 + END REPEAT IDENTIFIER('repeat_label')",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +192,SELECT IDENTIFIER('cnt'),NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION +193,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +194,"BEGIN + IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO + SELECT row.c1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +195,END FOR IDENTIFIER('for_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +196,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +197,EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab,NEW,INVALID_EXTRACT_BASE_FIELD_TYPE,INVALID_EXTRACT_BASE_FIELD_TYPE +198,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' + USING 'c1' AS col1",NEW,SUCCESS,SUCCESS +199,"CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS +200,"INSERT INTO integration_test VALUES (1, 'a'), (2, 'b')",NEW,SUCCESS,SUCCESS +201,"EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' + USING 'default' AS schema, 'integration_test' AS table",NEW,SUCCESS,SUCCESS +202,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' + USING 'c' AS prefix",NEW,SUCCESS,SUCCESS +203,"EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' + USING 'c1' AS col, 1 AS val",NEW,SUCCESS,SUCCESS +204,"CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV",NEW,SUCCESS,SUCCESS +205,"INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y')",NEW,SUCCESS,SUCCESS +206,"EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' + USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col",NEW,SUCCESS,PARSE_SYNTAX_ERROR +207,"EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) ORDER BY IDENTIFIER(:ord)) as rn FROM integration_test' + USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord",NEW,SUCCESS,SUCCESS +208,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' + USING 'c' AS prefix, 'count' AS agg, 'c1' AS col",NEW,SUCCESS,SUCCESS +209,"EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' + USING 'c1' AS col1, 'c2' AS col2",NEW,SUCCESS,SUCCESS +210,"EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' + USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2",NEW,SUCCESS,PARSE_SYNTAX_ERROR +211,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' + USING 'default' AS schema, 'my_table' AS table, 't' AS alias",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +212,"EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' + USING 'my_cte' AS cte_name",NEW,SUCCESS,PARSE_SYNTAX_ERROR +213,"EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' + USING 'test_view' AS view_name, 'test_col' AS col_name",NEW,SUCCESS,PARSE_SYNTAX_ERROR +214,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' + USING 'test_col' AS col, 'test_view' AS view",NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +215,DROP VIEW test_view,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND +216,"EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' + USING 'integration_test' AS tab, 'c4' AS new_col",NEW,SUCCESS,PARSE_SYNTAX_ERROR +217,"EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' + USING 'integration_test' AS tab, 'c4' AS old_col, 'c5' AS new_col",NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR +218,"EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' + USING 'mykey' AS key, 42 AS val",NEW,SUCCESS,PARSE_SYNTAX_ERROR +219,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' + USING 't' AS alias",NEW,UNRESOLVED_COLUMN.WITH_SUGGESTION,PARSE_SYNTAX_ERROR +220,"EXECUTE IMMEDIATE + 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' + USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab",NEW,SUCCESS,SUCCESS +221,"EXECUTE IMMEDIATE + 'BEGIN + DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +222,SELECT IDENTIFIER(:var_name) AS result,NEW,UNBOUND_SQL_PARAMETER,UNBOUND_SQL_PARAMETER +223,"END' + USING 'my_variable' AS var_name, 100 AS var_value",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR +224,"EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' + USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias",NEW,SUCCESS,SUCCESS +225,"EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' + USING 'default' AS schema, 'col1' AS col",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR +226,DROP TABLE integration_test,NEW,SUCCESS,SUCCESS +227,DROP TABLE integration_test2,NEW,SUCCESS,SUCCESS diff --git a/identifier-clause-comparison.md b/identifier-clause-comparison.md new file mode 100644 index 000000000000..0011e5e0dd1d --- /dev/null +++ b/identifier-clause-comparison.md @@ -0,0 +1,248 @@ +# IDENTIFIER Clause Test Comparison + +Comprehensive comparison of IDENTIFIER clause behavior across different modes. + +- **Total Tests**: 227 +- **Tests from Master**: 128 +- **New Tests Added**: 99 +- **Tests Changed from Master**: 13 +- **Tests with Legacy Mode Differences**: 47 + +## Legend + +- **Query#**: Sequential test number +- **SQL Text**: The SQL query being tested +- **Master**: Result from master branch (before identifier-lite changes) +- **identifier-clause**: Result with identifier-lite enabled (default mode, current) +- **identifier-clause-legacy**: Result with `spark.sql.legacy.identifierClause=true` + +### Result Values + +- `SUCCESS`: Query executed successfully +- ``: Query failed with the specified error class +- `NEW`: Test did not exist in master baseline + +--- + +## Full Test Results + +| Query# | SQL Text | Master | identifier-clause | identifier-clause-legacy | +|--------|----------|--------|-------------------|--------------------------| +| 1 | SET hivevar:colname = 'c' | SUCCESS | SUCCESS | SUCCESS | +| 2 | SELECT IDENTIFIER(${colname} \|\| '_1') FROM VALUES(1) AS T(c_1) | SUCCESS | SUCCESS | SUCCESS | +| 3 | SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 4 | SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 5 | SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 6 | SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`) | SUCCESS | SUCCESS | SUCCESS | +| 7 | SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``) | SUCCESS | SUCCESS | SUCCESS | +| 8 | SELECT IDENTIFIER('c' \|\| '1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 9 | CREATE SCHEMA IF NOT EXISTS s | SUCCESS | SUCCESS | SUCCESS | +| 10 | CREATE TABLE s.tab(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 11 | USE SCHEMA s | SUCCESS | SUCCESS | SUCCESS | +| 12 | INSERT INTO IDENTIFIER('ta' \|\| 'b') VALUES(1) | SUCCESS | SUCCESS | SUCCESS | +| 13 | DELETE FROM IDENTIFIER('ta' \|\| 'b') WHERE 1=0 | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | +| 14 | UPDATE IDENTIFIER('ta' \|\| 'b') SET c1 = 2 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | +| 15 | MERGE INTO IDENTIFIER('ta' \|\| 'b') AS t USING IDENTIFIER('ta' \|\| 'b') AS s ON s.c1 = t.c1 WHEN MATCHED THEN UPD... | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | +| 16 | SELECT * FROM IDENTIFIER('tab') | SUCCESS | SUCCESS | SUCCESS | +| 17 | SELECT * FROM IDENTIFIER('s.tab') | SUCCESS | SUCCESS | SUCCESS | +| 18 | SELECT * FROM IDENTIFIER('`s`.`tab`') | SUCCESS | SUCCESS | SUCCESS | +| 19 | SELECT * FROM IDENTIFIER('t' \|\| 'a' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 20 | USE SCHEMA default | SUCCESS | SUCCESS | SUCCESS | +| 21 | DROP TABLE s.tab | SUCCESS | SUCCESS | SUCCESS | +| 22 | DROP SCHEMA s | SUCCESS | SUCCESS | SUCCESS | +| 23 | SELECT IDENTIFIER('COAL' \|\| 'ESCE')(NULL, 1) | SUCCESS | SUCCESS | SUCCESS | +| 24 | SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 25 | SELECT * FROM IDENTIFIER('ra' \|\| 'nge')(0, 1) | SUCCESS | SUCCESS | SUCCESS | +| 26 | CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 27 | DROP TABLE IF EXISTS IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 28 | CREATE SCHEMA identifier_clauses | SUCCESS | SUCCESS | SUCCESS | +| 29 | USE identifier_clauses | SUCCESS | SUCCESS | SUCCESS | +| 30 | CREATE TABLE IDENTIFIER('ta' \|\| 'b')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 31 | DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' \|\| 'tab') | SUCCESS | SUCCESS | SUCCESS | +| 32 | CREATE TABLE IDENTIFIER('identifier_clauses.' \|\| 'tab')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 33 | REPLACE TABLE IDENTIFIER('identifier_clauses.' \|\| 'tab')(c1 INT) USING CSV | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | +| 34 | CACHE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 35 | UNCACHE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 36 | DROP TABLE IF EXISTS IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 37 | USE default | SUCCESS | SUCCESS | SUCCESS | +| 38 | DROP SCHEMA identifier_clauses | SUCCESS | SUCCESS | SUCCESS | +| 39 | CREATE TABLE tab(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 40 | INSERT INTO tab VALUES (1) | SUCCESS | SUCCESS | SUCCESS | +| 41 | SELECT c1 FROM tab | SUCCESS | SUCCESS | SUCCESS | +| 42 | DESCRIBE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 43 | ANALYZE TABLE IDENTIFIER('ta' \|\| 'b') COMPUTE STATISTICS | SUCCESS | SUCCESS | SUCCESS | +| 44 | ALTER TABLE IDENTIFIER('ta' \|\| 'b') ADD COLUMN c2 INT | SUCCESS | SUCCESS | SUCCESS | +| 45 | SHOW TBLPROPERTIES IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 46 | SHOW COLUMNS FROM IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 47 | COMMENT ON TABLE IDENTIFIER('ta' \|\| 'b') IS 'hello' | SUCCESS | SUCCESS | SUCCESS | +| 48 | REFRESH TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 49 | REPAIR TABLE IDENTIFIER('ta' \|\| 'b') | NOT_A_PARTITIONED_TABLE | NOT_A_PARTITIONED_TABLE | NOT_A_PARTITIONED_TABLE | +| 50 | TRUNCATE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | +| 51 | DROP TABLE IF EXISTS tab | SUCCESS | SUCCESS | SUCCESS | +| 52 | CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1) | SUCCESS | SUCCESS | SUCCESS | +| 53 | SELECT * FROM v | SUCCESS | SUCCESS | SUCCESS | +| 54 | ALTER VIEW IDENTIFIER('v') AS VALUES(2) | SUCCESS | SUCCESS | SUCCESS | +| 55 | DROP VIEW IDENTIFIER('v') | SUCCESS | SUCCESS | SUCCESS | +| 56 | CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1) | SUCCESS | SUCCESS | SUCCESS | +| 57 | DROP VIEW IDENTIFIER('v') | SUCCESS | SUCCESS | SUCCESS | +| 58 | CREATE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 59 | ALTER SCHEMA IDENTIFIER('id' \|\| 'ent') SET PROPERTIES (somekey = 'somevalue') | SUCCESS | SUCCESS | SUCCESS | +| 60 | ALTER SCHEMA IDENTIFIER('id' \|\| 'ent') SET LOCATION 'someloc' | SUCCESS | SUCCESS | SUCCESS | +| 61 | COMMENT ON SCHEMA IDENTIFIER('id' \|\| 'ent') IS 'some comment' | SUCCESS | SUCCESS | SUCCESS | +| 62 | DESCRIBE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 63 | SHOW TABLES IN IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 64 | SHOW TABLE EXTENDED IN IDENTIFIER('id' \|\| 'ent') LIKE 'hello' | SUCCESS | SUCCESS | SUCCESS | +| 65 | USE IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 66 | SHOW CURRENT SCHEMA | SUCCESS | SUCCESS | SUCCESS | +| 67 | USE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 68 | USE SCHEMA default | SUCCESS | SUCCESS | SUCCESS | +| 69 | DROP SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | +| 70 | CREATE SCHEMA ident | SUCCESS | SUCCESS | SUCCESS | +| 71 | CREATE FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | SUCCESS | SUCCESS | SUCCESS | +| 72 | DESCRIBE FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | +| 73 | REFRESH FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | +| 74 | DROP FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | +| 75 | DROP SCHEMA ident | SUCCESS | SUCCESS | SUCCESS | +| 76 | CREATE TEMPORARY FUNCTION IDENTIFIER('my' \|\| 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | SUCCESS | SUCCESS | SUCCESS | +| 77 | DROP TEMPORARY FUNCTION IDENTIFIER('my' \|\| 'DoubleAvg') | SUCCESS | SUCCESS | SUCCESS | +| 78 | DECLARE var = 'sometable' | SUCCESS | SUCCESS | SUCCESS | +| 79 | CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | +| 80 | SET VAR var = 'c1' | SUCCESS | SUCCESS | SUCCESS | +| 81 | SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | +| 82 | SET VAR var = 'some' | SUCCESS | SUCCESS | SUCCESS | +| 83 | DROP TABLE IDENTIFIER(var \|\| 'table') | SUCCESS | SUCCESS | SUCCESS | +| 84 | SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`) | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 85 | SELECT IDENTIFIER('') FROM VALUES(1) AS T(``) | PARSE_EMPTY_STATEMENT | PARSE_EMPTY_STATEMENT | PARSE_EMPTY_STATEMENT | +| 86 | VALUES(IDENTIFIER(CAST(NULL AS STRING))) | NOT_A_CONSTANT_STRING.NULL | NOT_A_CONSTANT_STRING.NULL | NOT_A_CONSTANT_STRING.NULL | +| 87 | VALUES(IDENTIFIER(1)) | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | +| 88 | VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1))) | NOT_A_CONSTANT_STRING.NOT_CONSTANT | NOT_A_CONSTANT_STRING.NOT_CONSTANT | NOT_A_CONSTANT_STRING.NOT_CONSTANT | +| 89 | SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1) | UNRESOLVED_ROUTINE | UNRESOLVED_ROUTINE | UNRESOLVED_ROUTINE | +| 90 | CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | +| 91 | CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 92 | CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1) | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 93 | DROP TABLE IDENTIFIER('a.b.c') | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 94 | DROP VIEW IDENTIFIER('a.b.c') | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 95 | COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello' | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | +| 96 | VALUES(IDENTIFIER(1)()) | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | +| 97 | VALUES(IDENTIFIER('a.b.c.d')()) | IDENTIFIER_TOO_MANY_NAME_PARTS | IDENTIFIER_TOO_MANY_NAME_PARTS | IDENTIFIER_TOO_MANY_NAME_PARTS | +| 98 | CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' \|\| 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | +| 99 | DROP TEMPORARY FUNCTION IDENTIFIER('default.my' \|\| 'DoubleAvg') | INVALID_SQL_SYNTAX.MULTI_PART_NAME | INVALID_SQL_SYNTAX.MULTI_PART_NAME | INVALID_SQL_SYNTAX.MULTI_PART_NAME | +| 100 | CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1) | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | +| 101 | create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | +| 102 | cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | +| 103 | create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | +| 104 | insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1 | SUCCESS | SUCCESS | SUCCESS | +| 105 | drop view v1 | SUCCESS | SUCCESS | SUCCESS | +| 106 | drop table t1 | SUCCESS | SUCCESS | SUCCESS | +| 107 | drop table t2 | SUCCESS | SUCCESS | SUCCESS | +| 108 | DECLARE agg = 'max' | SUCCESS | SUCCESS | SUCCESS | +| 109 | DECLARE col = 'c1' | SUCCESS | SUCCESS | SUCCESS | +| 110 | DECLARE tab = 'T' | SUCCESS | SUCCESS | SUCCESS | +| 111 | WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) SELECT IDENTIFIER(agg)(ID... | SUCCESS | SUCCESS | SUCCESS | +| 112 | WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) SELECT IDENTIFIER('max')(... | SUCCESS | SUCCESS | SUCCESS | +| 113 | WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' \|\| 'BC') | SUCCESS | SUCCESS | SUCCESS | +| 114 | SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **IDENTIFIER_TOO_MANY_NAME_PARTS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 115 | SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 116 | SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1) | UNRESOLVED_COLUMN.WITH_SUGGESTION | UNRESOLVED_COLUMN.WITH_SUGGESTION | UNRESOLVED_COLUMN.WITH_SUGGESTION | +| 117 | SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 118 | SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 119 | SELECT * FROM s.IDENTIFIER('tab') | INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 120 | SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 121 | SELECT * FROM IDENTIFIER('s').tab | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 122 | SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 123 | SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 124 | SELECT 1 AS IDENTIFIER('col1') | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 125 | SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 126 | WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 127 | CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 128 | SELECT c1 FROM v | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 129 | CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 130 | INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | +| 131 | SELECT c1 FROM tab | SUCCESS | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 132 | ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | +| 133 | SELECT col1 FROM tab | **NEW** | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **TABLE_OR_VIEW_NOT_FOUND** | +| 134 | ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 135 | SELECT c2 FROM tab | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 136 | ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | +| 137 | ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 138 | SELECT * FROM tab_renamed | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 139 | CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 140 | DROP TABLE IF EXISTS test_col_with_dot | **NEW** | SUCCESS | SUCCESS | +| 141 | SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 142 | SELECT 1 AS IDENTIFIER('col1.col2') | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 143 | CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 144 | SHOW VIEWS IN IDENTIFIER('default') | **NEW** | SUCCESS | SUCCESS | +| 145 | SHOW PARTITIONS IDENTIFIER('test_show') | **NEW** | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | +| 146 | SHOW CREATE TABLE IDENTIFIER('test_show') | **NEW** | SUCCESS | SUCCESS | +| 147 | DROP TABLE test_show | **NEW** | SUCCESS | SUCCESS | +| 148 | CREATE TABLE test_desc(c1 INT) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 149 | DESCRIBE TABLE IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | +| 150 | DESCRIBE FORMATTED IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | +| 151 | DESCRIBE EXTENDED IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | +| 152 | DESC IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | +| 153 | DROP TABLE test_desc | **NEW** | SUCCESS | SUCCESS | +| 154 | CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 155 | COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' | **NEW** | SUCCESS | SUCCESS | +| 156 | ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 157 | DROP TABLE test_comment | **NEW** | SUCCESS | SUCCESS | +| 158 | CREATE SCHEMA test_schema | **NEW** | SUCCESS | SUCCESS | +| 159 | CREATE TABLE test_schema.test_table(c1 INT) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 160 | ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS | **NEW** | SUCCESS | SUCCESS | +| 161 | REFRESH TABLE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | +| 162 | DESCRIBE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | +| 163 | SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | +| 164 | DROP TABLE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | +| 165 | DROP SCHEMA test_schema | **NEW** | SUCCESS | SUCCESS | +| 166 | DECLARE IDENTIFIER('my_var') = 'value' | **NEW** | SUCCESS | SUCCESS | +| 167 | SET VAR IDENTIFIER('my_var') = 'new_value' | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 168 | SELECT IDENTIFIER('my_var') | **NEW** | SUCCESS | SUCCESS | +| 169 | DROP TEMPORARY VARIABLE IDENTIFIER('my_var') | **NEW** | SUCCESS | SUCCESS | +| 170 | CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) RETURNS INT RETURN IDENTIFI... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 171 | SELECT test_udf(5, 'hello') | **NEW** | **SUCCESS** | **UNRESOLVED_ROUTINE** | +| 172 | DROP TEMPORARY FUNCTION test_udf | **NEW** | **SUCCESS** | **ROUTINE_NOT_FOUND** | +| 173 | CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIE... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 174 | SELECT * FROM test_table_udf(42) | **NEW** | **SUCCESS** | **UNRESOLVABLE_TABLE_VALUED_FUNCTION** | +| 175 | DROP TEMPORARY FUNCTION test_table_udf | **NEW** | **SUCCESS** | **ROUTINE_NOT_FOUND** | +| 176 | BEGIN IDENTIFIER('loop_label'): LOOP SELECT 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 177 | LEAVE IDENTIFIER('loop_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 178 | END LOOP loop_label | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 179 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 180 | BEGIN block_label: BEGIN DECLARE IDENTIFIER('x') INT DEFAULT 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 181 | SELECT x | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | +| 182 | END IDENTIFIER('block_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 183 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 184 | BEGIN DECLARE IDENTIFIER('counter') INT DEFAULT 0 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 185 | IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO SET VAR counter = IDENTIFIER('counter') + 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 186 | END WHILE while_label | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 187 | SELECT IDENTIFIER('counter') | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | +| 188 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 189 | BEGIN DECLARE IDENTIFIER('cnt') INT DEFAULT 0 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 190 | repeat_label: REPEAT SET VAR IDENTIFIER('cnt') = cnt + 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 191 | UNTIL IDENTIFIER('cnt') >= 2 END REPEAT IDENTIFIER('repeat_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 192 | SELECT IDENTIFIER('cnt') | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | +| 193 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 194 | BEGIN IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO SELECT row.c1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 195 | END FOR IDENTIFIER('for_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 196 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | +| 197 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab | **NEW** | INVALID_EXTRACT_BASE_FIELD_TYPE | INVALID_EXTRACT_BASE_FIELD_TYPE | +| 198 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' USING 'c1' AS col1 | **NEW** | SUCCESS | SUCCESS | +| 199 | CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 200 | INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') | **NEW** | SUCCESS | SUCCESS | +| 201 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' USING 'default' AS schema, 'integration_test' AS... | **NEW** | SUCCESS | SUCCESS | +| 202 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' USING 'c' AS ... | **NEW** | SUCCESS | SUCCESS | +| 203 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' USING 'c1' AS col, 1 AS val | **NEW** | SUCCESS | SUCCESS | +| 204 | CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | +| 205 | INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') | **NEW** | SUCCESS | SUCCESS | +| 206 | EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' USIN... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 207 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) O... | **NEW** | SUCCESS | SUCCESS | +| 208 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP B... | **NEW** | SUCCESS | SUCCESS | +| 209 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' USING 'c1' AS... | **NEW** | SUCCESS | SUCCESS | +| 210 | EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' USING ... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 211 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) ... | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | +| 212 | EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' USING 'my_ct... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 213 | EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' USI... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 214 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' USING 'test_col' AS col, 'test_view' AS view | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 215 | DROP VIEW test_view | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | +| 216 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' USING 'integration_test' AS ta... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | +| 217 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' USING '... | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | +| 218 | EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' USING 'mykey' AS key, 42 AS val | ** \ No newline at end of file diff --git a/identifier-clause-differences.txt b/identifier-clause-differences.txt new file mode 100644 index 000000000000..ffa9dffcac89 --- /dev/null +++ b/identifier-clause-differences.txt @@ -0,0 +1,364 @@ +======================================================================================================================== +IDENTIFIER CLAUSE: DETAILED COMPARISON OF DIFFERENCES +======================================================================================================================== + +Total tests: 227 +Tests with different behavior: 47 +Tests with identical behavior: 180 + +======================================================================================================================== +TESTS WITH DIFFERENT BEHAVIOR +======================================================================================================================== + +Query #114: + SQL: SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) + Master: NEW + identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #115: + SQL: SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #117: + SQL: SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #118: + SQL: SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #119: + SQL: SELECT * FROM s.IDENTIFIER('tab') + Master: NEW + identifier-clause: TABLE_OR_VIEW_NOT_FOUND + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #120: + SQL: SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') + Master: NEW + identifier-clause: TABLE_OR_VIEW_NOT_FOUND + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #121: + SQL: SELECT * FROM IDENTIFIER('s').tab + Master: NEW + identifier-clause: TABLE_OR_VIEW_NOT_FOUND + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #122: + SQL: SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #123: + SQL: SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #124: + SQL: SELECT 1 AS IDENTIFIER('col1') + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #125: + SQL: SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #126: + SQL: WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #127: + SQL: CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #128: + SQL: SELECT c1 FROM v + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND + +Query #129: + SQL: CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #130: + SQL: INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #131: + SQL: SELECT c1 FROM tab + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND + +Query #132: + SQL: ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') + Master: NEW + identifier-clause: UNSUPPORTED_FEATURE.TABLE_OPERATION + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #133: + SQL: SELECT col1 FROM tab + Master: NEW + identifier-clause: UNRESOLVED_COLUMN.WITH_SUGGESTION + identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND + +Query #134: + SQL: ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #135: + SQL: SELECT c2 FROM tab + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND + +Query #136: + SQL: ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') + Master: NEW + identifier-clause: UNSUPPORTED_FEATURE.TABLE_OPERATION + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #137: + SQL: ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #138: + SQL: SELECT * FROM tab_renamed + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND + +Query #139: + SQL: CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #141: + SQL: SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) + Master: NEW + identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #142: + SQL: SELECT 1 AS IDENTIFIER('col1.col2') + Master: NEW + identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #156: + SQL: ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #167: + SQL: SET VAR IDENTIFIER('my_var') = 'new_value' + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #170: + SQL: CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) +RETURNS IN... + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #171: + SQL: SELECT test_udf(5, 'hello') + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: UNRESOLVED_ROUTINE + +Query #172: + SQL: DROP TEMPORARY FUNCTION test_udf + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: ROUTINE_NOT_FOUND + +Query #173: + SQL: CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) +RETURNS TABLE(IDENTIFIER('col1... + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #174: + SQL: SELECT * FROM test_table_udf(42) + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: UNRESOLVABLE_TABLE_VALUED_FUNCTION + +Query #175: + SQL: DROP TEMPORARY FUNCTION test_table_udf + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: ROUTINE_NOT_FOUND + +Query #206: + SQL: EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFI... + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #210: + SQL: EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1,... + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #211: + SQL: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_stru... + Master: NEW + identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #212: + SQL: EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name... + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #213: + SQL: EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS... + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #214: + SQL: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' + USING 'test_col' AS col, 'test_... + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND + +Query #215: + SQL: DROP VIEW test_view + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND + +Query #216: + SQL: EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' + USING 'integr... + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #217: + SQL: EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:ne... + Master: NEW + identifier-clause: UNSUPPORTED_FEATURE.TABLE_OPERATION + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #218: + SQL: EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' + USING 'mykey' AS key, 42 AS ... + Master: NEW + identifier-clause: SUCCESS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #219: + SQL: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' + ... + Master: NEW + identifier-clause: UNRESOLVED_COLUMN.WITH_SUGGESTION + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +Query #225: + SQL: EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' + USING 'default' AS schema, 'col1' A... + Master: NEW + identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS + identifier-clause-legacy: PARSE_SYNTAX_ERROR + +======================================================================================================================== +PATTERN ANALYSIS +======================================================================================================================== + + +Parse errors → Success: 24 tests +-------------------------------------------------------------------------------- + #115: SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (... + #117: SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)... + #118: SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)... + #122: SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WIN... + #123: SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER... + #124: SELECT 1 AS IDENTIFIER('col1')... + #125: SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIF... + #126: WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM ... + #127: CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1)... + #129: CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV... + #130: INSERT INTO tab(IDENTIFIER('c1')) VALUES(1)... + #134: ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT... + #137: ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed')... + #139: CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING... + #156: ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column... + #167: SET VAR IDENTIFIER('my_var') = 'new_value'... + #170: CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIE... + #173: CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) +... + #206: EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDEN... + #210: EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDE... + #212: EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELEC... + #213: EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_n... + #216: EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(... + #218: EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' +... + +Parse errors → Different error: 12 tests +-------------------------------------------------------------------------------- + #114: SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) W... + #119: SELECT * FROM s.IDENTIFIER('tab')... + #120: SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab')... + #121: SELECT * FROM IDENTIFIER('s').tab... + #132: ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTI... + #136: ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2')... + #141: SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2)... + #142: SELECT 1 AS IDENTIFIER('col1.col2')... + #211: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''... + #217: EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFI... + #219: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_... + #225: EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' + USING... + +Success → Parse errors: 10 tests +-------------------------------------------------------------------------------- + #128: SELECT c1 FROM v... + #131: SELECT c1 FROM tab... + #135: SELECT c2 FROM tab... + #138: SELECT * FROM tab_renamed... + #171: SELECT test_udf(5, 'hello')... + #172: DROP TEMPORARY FUNCTION test_udf... + #174: SELECT * FROM test_table_udf(42)... + #175: DROP TEMPORARY FUNCTION test_table_udf... + #214: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' + U... + #215: DROP VIEW test_view... diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index cf468797932c..b2de673286a8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -321,6 +321,20 @@ class SqlScriptingLabelContext { private def checkLabels( beginLabelCtx: Option[BeginLabelContext], endLabelCtx: Option[EndLabelContext]): Unit = { + // First, check if the begin label is qualified (if it exists). + beginLabelCtx.foreach { bl => + val resolvedLabel = ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()) + if (bl.multipartIdentifier().parts.size() > 1 || resolvedLabel.contains(".")) { + withOrigin(bl) { + throw SqlScriptingErrors.labelCannotBeQualified( + CurrentOrigin.get, + resolvedLabel.toLowerCase(Locale.ROOT) + ) + } + } + } + + // Then, check label matching and other constraints. (beginLabelCtx, endLabelCtx) match { // Throw an error if labels do not match. case (Some(bl: BeginLabelContext), Some(el: EndLabelContext)) => @@ -336,16 +350,6 @@ class SqlScriptingLabelContext { ParserUtils.getMultipartIdentifierText(el.multipartIdentifier())) } } - // Throw an error if label is qualified. - case (Some(bl: BeginLabelContext), _) - if bl.multipartIdentifier().parts.size() > 1 => - withOrigin(bl) { - throw SqlScriptingErrors.labelCannotBeQualified( - CurrentOrigin.get, - ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()) - .toLowerCase(Locale.ROOT) - ) - } // Throw an error if end label exists without begin label. case (None, Some(el: EndLabelContext)) => withOrigin(el) { diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate-legacy-identifier.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate-legacy-identifier.sql.out new file mode 100644 index 000000000000..1271f730d1e5 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate-legacy-identifier.sql.out @@ -0,0 +1,1226 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES + (10, 'name1', named_struct('f1', 1, 's2', named_struct('f2', 101, 'f3', 'a'))), + (20, 'name2', named_struct('f1', 2, 's2', named_struct('f2', 202, 'f3', 'b'))), + (30, 'name3', named_struct('f1', 3, 's2', named_struct('f2', 303, 'f3', 'c'))), + (40, 'name4', named_struct('f1', 4, 's2', named_struct('f2', 404, 'f3', 'd'))), + (50, 'name5', named_struct('f1', 5, 's2', named_struct('f2', 505, 'f3', 'e'))), + (60, 'name6', named_struct('f1', 6, 's2', named_struct('f2', 606, 'f3', 'f'))), + (70, 'name7', named_struct('f1', 7, 's2', named_struct('f2', 707, 'f3', 'g'))) +AS tbl_view(id, name, data) +-- !query analysis +CreateViewCommand `tbl_view`, SELECT * FROM VALUES + (10, 'name1', named_struct('f1', 1, 's2', named_struct('f2', 101, 'f3', 'a'))), + (20, 'name2', named_struct('f1', 2, 's2', named_struct('f2', 202, 'f3', 'b'))), + (30, 'name3', named_struct('f1', 3, 's2', named_struct('f2', 303, 'f3', 'c'))), + (40, 'name4', named_struct('f1', 4, 's2', named_struct('f2', 404, 'f3', 'd'))), + (50, 'name5', named_struct('f1', 5, 's2', named_struct('f2', 505, 'f3', 'e'))), + (60, 'name6', named_struct('f1', 6, 's2', named_struct('f2', 606, 'f3', 'f'))), + (70, 'name7', named_struct('f1', 7, 's2', named_struct('f2', 707, 'f3', 'g'))) +AS tbl_view(id, name, data), false, false, LocalTempView, UNSUPPORTED, true + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +CREATE TABLE x (id INT) USING csv +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`default`.`x`, false + + +-- !query +DECLARE sql_string STRING +-- !query analysis +CreateVariable defaultvalueexpression(null, null), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.sql_string + + +-- !query +SET VAR sql_string = 'SELECT * from tbl_view where name = \'name1\'' +-- !query analysis +SetVariable [variablereference(system.session.sql_string=CAST(NULL AS STRING))] ++- Project [SELECT * from tbl_view where name = 'name1' AS sql_string#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SET spark.sql.ansi.enabled=true' +-- !query analysis +CommandResult [key#x, value#x], Execute SetCommand, [[spark.sql.ansi.enabled,true]] + +- SetCommand (spark.sql.ansi.enabled,Some(true)) + + +-- !query +EXECUTE IMMEDIATE 'CREATE TEMPORARY VIEW IDENTIFIER(:tblName) AS SELECT id, name FROM tbl_view' USING 'tbl_view_tmp' as tblName +-- !query analysis +CommandResult Execute CreateViewCommand + +- CreateViewCommand `tbl_view_tmp`, SELECT id, name FROM tbl_view, false, false, LocalTempView, UNSUPPORTED, true + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM tbl_view_tmp' +-- !query analysis +Project [id#x, name#x] ++- SubqueryAlias tbl_view_tmp + +- View (`tbl_view_tmp`, [id#x, name#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] + +- Project [id#x, name#x] + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'REFRESH TABLE IDENTIFIER(:tblName)' USING 'x' as tblName +-- !query analysis +CommandResult Execute RefreshTableCommand + +- RefreshTableCommand `spark_catalog`.`default`.`x` + + +-- !query +EXECUTE IMMEDIATE sql_string +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter (name#x = name1) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = \'name1\'' +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter (name#x = name1) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SET VAR sql_string = 'SELECT * from tbl_view where name = ? or name = ?' +-- !query analysis +SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = \'name1\'')] ++- Project [SELECT * from tbl_view where name = ? or name = ? AS sql_string#x] + +- OneRowRelation + + +-- !query +DECLARE a STRING +-- !query analysis +CreateVariable defaultvalueexpression(null, null), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.a + + +-- !query +SET VAR a = 'name1' +-- !query analysis +SetVariable [variablereference(system.session.a=CAST(NULL AS STRING))] ++- Project [name1 AS a#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE sql_string USING 'name1', 'name3' +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((name#x = name1) OR (name#x = name3)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE sql_string USING a, 'name2' +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((name#x = name1) OR (name#x = name2)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = ? or name = ?' USING 'name1', 'name3' +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((name#x = name1) OR (name#x = name3)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = ? or name = ?' USING a, 'name2' +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((name#x = name1) OR (name#x = name2)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = ? or name = ?' USING (a, 'name2') +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((name#x = name1) OR (name#x = name2)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'INSERT INTO x VALUES(?)' USING 1 +-- !query analysis +CommandResult Execute InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/x, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/x], Append, `spark_catalog`.`default`.`x`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/x), [id] + +- InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/x, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/x], Append, `spark_catalog`.`default`.`x`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/x), [id] + +- Project [col1#x AS id#x] + +- LocalRelation [col1#x] + + +-- !query +SELECT * from x +-- !query analysis +Project [id#x] ++- SubqueryAlias spark_catalog.default.x + +- Relation spark_catalog.default.x[id#x] csv + + +-- !query +SET VAR sql_string = 'SELECT * from tbl_view where name = :first or id = :second' +-- !query analysis +SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = ? or name = ?')] ++- Project [SELECT * from tbl_view where name = :first or id = :second AS sql_string#x] + +- OneRowRelation + + +-- !query +DECLARE b INT +-- !query analysis +CreateVariable defaultvalueexpression(null, null), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.b + + +-- !query +SET VAR b = 40 +-- !query analysis +SetVariable [variablereference(system.session.b=CAST(NULL AS INT))] ++- Project [40 AS b#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE sql_string USING 40 as second, 'name7' as first +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((name#x = name7) OR (id#x = 40)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE sql_string USING b as second, 'name7' as first +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((name#x = name7) OR (id#x = 40)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first or id = :second' USING 40 as second, 'name7' as first +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((name#x = name7) OR (id#x = 40)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first or id = :second' USING 'name7' as first, b as second +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((name#x = name7) OR (id#x = 40)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT tbl_view.*, :first as p FROM tbl_view WHERE name = :first' USING 'name7' as first +-- !query analysis +Project [id#x, name#x, data#x, name7 AS p#x] ++- Filter (name#x = name7) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SET VAR sql_string = ?' USING 'SELECT id from tbl_view where name = :first' +-- !query analysis +CommandResult SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = :first or id = :second')] + +- SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = :first or id = :second')] + +- Project [SELECT id from tbl_view where name = :first AS sql_string#x] + +- OneRowRelation + + +-- !query +SELECT sql_string +-- !query analysis +Project [variablereference(system.session.sql_string='SELECT id from tbl_view where name = :first') AS sql_string#x] ++- OneRowRelation + + +-- !query +DECLARE res_id INT +-- !query analysis +CreateVariable defaultvalueexpression(null, null), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.res_id + + +-- !query +EXECUTE IMMEDIATE sql_string INTO res_id USING 'name7' as first +-- !query analysis +SetVariable [variablereference(system.session.res_id=CAST(NULL AS INT))] ++- GlobalLimit 2 + +- LocalLimit 2 + +- Project [id#x] + +- Filter (name#x = name7) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT res_id +-- !query analysis +Project [variablereference(system.session.res_id=70) AS res_id#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE sql_string INTO res_id USING a as first +-- !query analysis +SetVariable [variablereference(system.session.res_id=70)] ++- GlobalLimit 2 + +- LocalLimit 2 + +- Project [id#x] + +- Filter (name#x = name1) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT res_id +-- !query analysis +Project [variablereference(system.session.res_id=10) AS res_id#x] ++- OneRowRelation + + +-- !query +SET VAR sql_string = 'SELECT * from tbl_view where name = :first or id = :second' +-- !query analysis +SetVariable [variablereference(system.session.sql_string='SELECT id from tbl_view where name = :first')] ++- Project [SELECT * from tbl_view where name = :first or id = :second AS sql_string#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT 42' INTO res_id +-- !query analysis +SetVariable [variablereference(system.session.res_id=10)] ++- Project [42 AS 42#x] + +- OneRowRelation + + +-- !query +SELECT res_id +-- !query analysis +Project [variablereference(system.session.res_id=42) AS res_id#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO b, a USING 10 +-- !query analysis +SetVariable [variablereference(system.session.b=40), variablereference(system.session.a='name1')] ++- GlobalLimit 2 + +- LocalLimit 2 + +- Project [id#x, name#x] + +- Filter (id#x = 10) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SELECT b, a +-- !query analysis +Project [variablereference(system.session.b=10) AS b#x, variablereference(system.session.a='name1') AS a#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where id = ? AND name = ?' USING b as first, a +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter ((id#x = 10) AND (name#x = name1)) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT 42 WHERE 2 = 1' INTO res_id +-- !query analysis +SetVariable [variablereference(system.session.res_id=42)] ++- Project [42 AS 42#x] + +- Filter (2 = 1) + +- OneRowRelation + + +-- !query +SELECT res_id +-- !query analysis +Project [variablereference(system.session.res_id=CAST(NULL AS INT)) AS res_id#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT \'1707\'' INTO res_id +-- !query analysis +SetVariable [variablereference(system.session.res_id=CAST(NULL AS INT))] ++- Project [cast(1707#x as int) AS res_id#x] + +- Project [1707 AS 1707#x] + +- OneRowRelation + + +-- !query +SELECT res_id +-- !query analysis +Project [variablereference(system.session.res_id=1707) AS res_id#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT \'invalid_cast_error_expected\'' INTO res_id +-- !query analysis +org.apache.spark.SparkNumberFormatException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "'invalid_cast_error_expected'", + "sourceType" : "\"STRING\"", + "targetType" : "\"INT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 70, + "fragment" : "EXECUTE IMMEDIATE 'SELECT \\'invalid_cast_error_expected\\'' INTO res_id" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'INSERT INTO x VALUES (?)' INTO res_id USING 1 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_STATEMENT_FOR_EXECUTE_INTO", + "sqlState" : "07501", + "messageParameters" : { + "sqlString" : "INSERT INTO X VALUES (?)" + } +} + + +-- !query +DECLARE OR REPLACE testvarA INT +-- !query analysis +CreateVariable defaultvalueexpression(null, null), true ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.testvarA + + +-- !query +EXECUTE IMMEDIATE 'SET VAR testVarA = 1' INTO testVarA +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_STATEMENT_FOR_EXECUTE_INTO", + "sqlState" : "07501", + "messageParameters" : { + "sqlString" : "SET VAR TESTVARA = 1" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM tbl_view WHERE ? = id' USING id +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`id`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 63, + "stopIndex" : 64, + "fragment" : "id" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where ? = id and :first = name' USING 1 as x, 'name2' as first +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_QUERY_MIXED_QUERY_PARAMETERS", + "sqlState" : "42613" +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where :x = id and :first = name' USING 1, 'name2' as first +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "ALL_PARAMETERS_MUST_BE_NAMED", + "sqlState" : "07001", + "messageParameters" : { + "exprs" : "\"1\"" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where :first = name' USING 1, 'name2' as first +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "ALL_PARAMETERS_MUST_BE_NAMED", + "sqlState" : "07001", + "messageParameters" : { + "exprs" : "\"1\"" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELCT Fa' +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'SELCT'", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 8, + "fragment" : "SELCT Fa" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELCT Fa' INTO res_id +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'SELCT'", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 8, + "fragment" : "SELCT Fa" + } ] +} + + +-- !query +EXECUTE IMMEDIATE b +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE", + "sqlState" : "42K09", + "messageParameters" : { + "exprType" : "\"INT\"" + } +} + + +-- !query +SET VAR sql_string = 'SELECT * from tbl_view where name = :first or id = :second' +-- !query analysis +SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = :first or id = :second')] ++- Project [SELECT * from tbl_view where name = :first or id = :second AS sql_string#x] + +- OneRowRelation + + +-- !query +SET VAR a = 'na' +-- !query analysis +SetVariable [variablereference(system.session.a='name1')] ++- Project [na AS a#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first' USING CONCAT(a , "me1") as first +-- !query analysis +Project [id#x, name#x, data#x] ++- Filter (name#x = name1) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first' USING (SELECT 42) as first, 'name2' as second +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_EXPR_FOR_PARAMETER", + "sqlState" : "42K0E", + "messageParameters" : { + "invalidExprSql" : "\"scalarsubquery()\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 70, + "stopIndex" : 80, + "fragment" : "(SELECT 42)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO a, b USING 10 +-- !query analysis +org.apache.spark.SparkNumberFormatException +{ + "errorClass" : "CAST_INVALID_INPUT", + "sqlState" : "22018", + "messageParameters" : { + "ansiConfig" : "\"spark.sql.ansi.enabled\"", + "expression" : "'name1'", + "sourceType" : "\"STRING\"", + "targetType" : "\"INT\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 81, + "fragment" : "EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO a, b USING 10" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO (a, b) USING 10 +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT id FROM tbl_view' INTO res_id +-- !query analysis +org.apache.spark.SparkException +{ + "errorClass" : "ROW_SUBQUERY_TOO_MANY_ROWS", + "sqlState" : "21000" +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT id, data.f1 FROM tbl_view' INTO res_id +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "ASSIGNMENT_ARITY_MISMATCH", + "sqlState" : "42802", + "messageParameters" : { + "numExpr" : "2", + "numTarget" : "1" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT id FROM tbl_view' INTO res_id, b +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "ASSIGNMENT_ARITY_MISMATCH", + "sqlState" : "42802", + "messageParameters" : { + "numExpr" : "1", + "numTarget" : "2" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT id FROM tbl_view WHERE id = :first' USING 10 as first, 20 as first +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "EXEC_IMMEDIATE_DUPLICATE_ARGUMENT_ALIASES", + "sqlState" : "42701", + "messageParameters" : { + "aliases" : "`first`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 63, + "stopIndex" : 92, + "fragment" : "USING 10 as first, 20 as first" + } ] +} + + +-- !query +DECLARE p = 10 +-- !query analysis +CreateVariable defaultvalueexpression(10, 10), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.p + + +-- !query +EXECUTE IMMEDIATE 'SELECT id FROM tbl_view WHERE id = :p' USING p +-- !query analysis +Project [id#x] ++- Filter (id#x = 10) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +EXECUTE IMMEDIATE 'SELECT id FROM tbl_view WHERE id = :p' USING p, 'p' +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "ALL_PARAMETERS_MUST_BE_NAMED", + "sqlState" : "07001", + "messageParameters" : { + "exprs" : "\"p\"" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT id, data.f1 FROM tbl_view WHERE id = 10' INTO res_id, res_id +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "DUPLICATE_ASSIGNMENTS", + "sqlState" : "42701", + "messageParameters" : { + "nameList" : "`res_id`" + } +} + + +-- !query +EXECUTE IMMEDIATE 'EXECUTE IMMEDIATE \'SELECT id FROM tbl_view WHERE id = ?\' USING 10' +-- !query analysis +Project [id#x] ++- Filter (id#x = 10) + +- SubqueryAlias tbl_view + +- View (`tbl_view`, [id#x, name#x, data#x]) + +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] + +- Project [id#x, name#x, data#x] + +- SubqueryAlias tbl_view + +- LocalRelation [id#x, name#x, data#x] + + +-- !query +SET VAR sql_string = null +-- !query analysis +SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = :first or id = :second')] ++- Project [cast(sql_string#x as string) AS sql_string#x] + +- Project [null AS sql_string#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE sql_string +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NULL_QUERY_STRING_EXECUTE_IMMEDIATE", + "sqlState" : "22004", + "messageParameters" : { + "varName" : "`sql_string`" + } +} + + +-- !query +SET VAR sql_string = 5 +-- !query analysis +SetVariable [variablereference(system.session.sql_string=CAST(NULL AS STRING))] ++- Project [cast(sql_string#x as string) AS sql_string#x] + +- Project [5 AS sql_string#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE sql_string +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'5'", + "hint" : "" + }, + "queryContext" : [ { + "objectType" : "EXECUTE IMMEDIATE", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 1, + "fragment" : "5" + } ] +} + + +-- !query +SET VAR sql_string = 'hello' +-- !query analysis +SetVariable [variablereference(system.session.sql_string='5')] ++- Project [hello AS sql_string#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE length(sql_string) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE", + "sqlState" : "42K09", + "messageParameters" : { + "exprType" : "\"INT\"" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT 42 where ? = :first' USING 1, 2 as first +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_QUERY_MIXED_QUERY_PARAMETERS", + "sqlState" : "42613" +} + + +-- !query +DECLARE int_var INT +-- !query analysis +CreateVariable defaultvalueexpression(null, null), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.int_var + + +-- !query +SET VAR int_var = 42 +-- !query analysis +SetVariable [variablereference(system.session.int_var=CAST(NULL AS INT))] ++- Project [42 AS int_var#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE int_var +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE", + "sqlState" : "42K09", + "messageParameters" : { + "exprType" : "\"INT\"" + } +} + + +-- !query +DECLARE null_var STRING +-- !query analysis +CreateVariable defaultvalueexpression(null, null), false ++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.null_var + + +-- !query +SET VAR null_var = null +-- !query analysis +SetVariable [variablereference(system.session.null_var=CAST(NULL AS STRING))] ++- Project [cast(null_var#x as string) AS null_var#x] + +- Project [null AS null_var#x] + +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE null_var +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "NULL_QUERY_STRING_EXECUTE_IMMEDIATE", + "sqlState" : "22004", + "messageParameters" : { + "varName" : "`null_var`" + } +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT ?' USING (SELECT 1) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNSUPPORTED_EXPR_FOR_PARAMETER", + "sqlState" : "42K0E", + "messageParameters" : { + "invalidExprSql" : "\"scalarsubquery()\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 36, + "stopIndex" : 45, + "fragment" : "(SELECT 1)" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT :first' USING 2, 3 +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "ALL_PARAMETERS_MUST_BE_NAMED", + "sqlState" : "07001", + "messageParameters" : { + "exprs" : "\"2\", \"3\"" + } +} + + +-- !query +EXECUTE IMMEDIATE (SELECT c FROM (VALUES(1)) AS T(c)) +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE", + "sqlState" : "42K09", + "messageParameters" : { + "exprType" : "\"INT\"" + } +} + + +-- !query +DROP TABLE x +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.x + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 5 AS p +-- !query analysis +Project [typeof(5) AS type#x, 5 AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 5L AS p +-- !query analysis +Project [typeof(5) AS type#x, 5 AS val#xL] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 5S AS p +-- !query analysis +Project [typeof(5) AS type#x, 5 AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 5Y AS p +-- !query analysis +Project [typeof(5) AS type#x, 5 AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 3.14F AS p +-- !query analysis +Project [typeof(cast(3.14 as float)) AS type#x, cast(3.14 as float) AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 3.14159D AS p +-- !query analysis +Project [typeof(3.14159) AS type#x, 3.14159 AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 123.45BD AS p +-- !query analysis +Project [typeof(123.45) AS type#x, 123.45 AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING true AS p +-- !query analysis +Project [typeof(true) AS type#x, true AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING false AS p +-- !query analysis +Project [typeof(false) AS type#x, false AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 'hello world' AS p +-- !query analysis +Project [typeof(hello world) AS type#x, hello world AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 'it''s a test' AS p +-- !query analysis +Project [typeof(it's a test) AS type#x, it's a test AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING DATE '2023-12-25' AS p +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING TIMESTAMP '2023-12-25 10:30:45' AS p +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING TIMESTAMP_NTZ '2023-12-25 10:30:45' AS p +-- !query analysis +Project [typeof(2023-12-25 10:30:45) AS type#x, 2023-12-25 10:30:45 AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING CAST(NULL AS INT) AS p +-- !query analysis +Project [typeof(cast(null as int)) AS type#x, cast(null as int) AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING CAST(NULL AS STRING) AS p +-- !query analysis +Project [typeof(cast(null as string)) AS type#x, cast(null as string) AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, hex(:p) as val' USING X'010203FF' AS p +-- !query analysis +Project [typeof(0x010203FF) AS type#x, hex(0x010203FF) AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING INTERVAL '3' DAY AS p +-- !query analysis +Project [typeof(INTERVAL '3' DAY) AS type#x, INTERVAL '3' DAY AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING INTERVAL '2' YEAR AS p +-- !query analysis +Project [typeof(INTERVAL '2' YEAR) AS type#x, INTERVAL '2' YEAR AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING INTERVAL '1-2' YEAR TO MONTH AS p +-- !query analysis +Project [typeof(INTERVAL '1-2' YEAR TO MONTH) AS type#x, INTERVAL '1-2' YEAR TO MONTH AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING INTERVAL '3 4:5:6' DAY TO SECOND AS p +-- !query analysis +Project [typeof(INTERVAL '3 04:05:06' DAY TO SECOND) AS type#x, INTERVAL '3 04:05:06' DAY TO SECOND AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 999.999BD AS p +-- !query analysis +Project [typeof(999.999) AS type#x, 999.999 AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p1) as type1, :p1 as val1, typeof(:p2) as type2, :p2 as val2' + USING 42 as p1, 'test string' as p2 +-- !query analysis +Project [typeof(42) AS type1#x, 42 AS val1#x, typeof(test string) AS type2#x, test string AS val2#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING ARRAY(1, 2, 3) AS p +-- !query analysis +Project [typeof(array(1, 2, 3)) AS type#x, array(1, 2, 3) AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING ARRAY('a', 'b', 'c') AS p +-- !query analysis +Project [typeof(array(a, b, c)) AS type#x, array(a, b, c) AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING ARRAY(ARRAY(1, 2), ARRAY(3, 4)) AS p +-- !query analysis +Project [typeof(array(array(1, 2), array(3, 4))) AS type#x, array(array(1, 2), array(3, 4)) AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING MAP('key1', 'value1', 'key2', 'value2') AS p +-- !query analysis +Project [typeof(map(key1, value1, key2, value2)) AS type#x, map(key1, value1, key2, value2) AS val#x] ++- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING MAP(1, 'one', 2, 'two') AS p +-- !query analysis +Project [typeof(map(1, one, 2, two)) AS type#x, map(1, one, 2, two) AS val#x] ++- OneRowRelation From 05674567ca4a7fdf6299d9654dc4d273070dfc64 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 5 Nov 2025 19:39:14 -0800 Subject: [PATCH 10/37] Fix more testcases, some rework --- .../sql/catalyst/parser/ParserUtils.scala | 123 +++++++++++++++--- .../v2/V2SessionCatalogSuite.scala | 4 +- 2 files changed, 104 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index b2de673286a8..a14e77d6b558 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -48,42 +48,123 @@ object ParserUtils extends SparkParserUtils { throw QueryParsingErrors.invalidStatementError(statement, ctx) } - private val IDENTIFIER_PREFIX = "IDENTIFIER(" - /** * Gets the resolved text of a multipart identifier, handling IDENTIFIER('literal') syntax. - * This method parses each part through CatalystSqlParser to resolve identifier-lite - * expressions into their actual identifier names. + * This method properly traverses the parse tree structure to extract identifier literals, + * making it robust to comments, whitespace, and string coalescing. + * Uses the same pattern-matching approach as DataTypeAstBuilder.getIdentifierParts. * * @param ctx The multipart identifier context from the parse tree. * @return The resolved identifier text as a dot-separated string. */ def getMultipartIdentifierText(ctx: MultipartIdentifierContext): String = { ctx.parts.asScala.flatMap { part => - val partText = part.getText - // Check if this is an IDENTIFIER('...') literal. - if (partText.startsWith(IDENTIFIER_PREFIX) && partText.endsWith(")")) { - // Extract the literal string between the parentheses. - val literal = partText.substring(IDENTIFIER_PREFIX.length, partText.length - 1) - // Remove quotes and unescape single quotes. - val unquoted = if (literal.startsWith("'") && literal.endsWith("'")) { - literal.substring(1, literal.length - 1).replace("''", "'") + getErrorCapturingIdentifierParts(part) + }.mkString(".") + } + + /** + * Extract identifier parts from an ErrorCapturingIdentifierContext. + * Mirrors the logic in DataTypeAstBuilder.getIdentifierParts but adapted for use + * in ParserUtils where we don't have access to the full AstBuilder infrastructure. + */ + private def getErrorCapturingIdentifierParts( + ctx: SqlBaseParser.ErrorCapturingIdentifierContext): Seq[String] = { + + ctx match { + case base: SqlBaseParser.ErrorCapturingIdentifierBaseContext => + // Regular identifier with errorCapturingIdentifierExtra. + val identifier = base.identifier() + if (identifier != null && identifier.strictIdentifier() != null) { + getStrictIdentifierParts(identifier.strictIdentifier()) } else { - literal + Seq(ctx.getText) } - // Parse as multipart identifier and return the parts. + case idLit: SqlBaseParser.IdentifierLiteralWithExtraContext => + // IDENTIFIER('literal') in errorCapturingIdentifier. + val literalValue = extractStringLiteralValue(idLit.stringLit()) + // Parse the literal as a multipart identifier. try { - CatalystSqlParser.parseMultipartIdentifier(unquoted) + CatalystSqlParser.parseMultipartIdentifier(literalValue) } catch { - case _: ParseException => - // If parsing fails, treat the entire text as a single identifier part. - Seq(partText) + case _: ParseException => Seq(literalValue) + } + case _ => + Seq(ctx.getText) + } + } + + /** + * Extract identifier parts from a StrictIdentifierContext. + * Mirrors DataTypeAstBuilder logic for strictIdentifier contexts. + */ + private def getStrictIdentifierParts( + ctx: SqlBaseParser.StrictIdentifierContext): Seq[String] = { + ctx match { + case idLit: SqlBaseParser.IdentifierLiteralContext => + // IDENTIFIER('literal') in strictIdentifier. + val literalValue = extractStringLiteralValue(idLit.stringLit()) + try { + CatalystSqlParser.parseMultipartIdentifier(literalValue) + } catch { + case _: ParseException => Seq(literalValue) + } + case _ => + // Regular identifier (unquoted, quoted, or keyword). + Seq(ctx.getText) + } + } + + /** + * Extract the string value from a StringLitContext. + * This properly handles string coalescing ('a' 'b' -> 'ab'), escaping, and whitespace/comments. + * Mirrors the string extraction logic used in DataTypeAstBuilder. + */ + private def extractStringLiteralValue(ctx: SqlBaseParser.StringLitContext): String = { + import scala.jdk.CollectionConverters._ + + if (ctx == null) { + return "" + } + + // Extract all string literal tokens from the parse tree. + val tokens = ctx.singleStringLit().asScala.flatMap { singleStr => + val childCount = singleStr.getChildCount + if (childCount > 0) { + val child = singleStr.getChild(0) + child match { + case terminal: org.antlr.v4.runtime.tree.TerminalNode => + Some(terminal.getSymbol) + case _ => None } } else { - // Regular identifier - return as-is. - Seq(partText) + None } - }.mkString(".") + } + + if (tokens.isEmpty) { + // Fallback: extract via getText if token extraction failed. + val text = ctx.getText + if (text.startsWith("'") && text.endsWith("'")) { + return text.substring(1, text.length - 1).replace("''", "'") + } else if (text.startsWith("\"") && text.endsWith("\"")) { + return text.substring(1, text.length - 1).replace("\"\"", "\"") + } else { + return text + } + } + + // Coalesce multiple string literals and unescape. + tokens.map { token => + val text = token.getText + if (text.startsWith("'") && text.endsWith("'")) { + text.substring(1, text.length - 1).replace("''", "'") + } else if (text.startsWith("\"") && text.endsWith("\"")) { + text.substring(1, text.length - 1).replace("\"\"", "\"") + } else { + text + } + }.mkString("") } def checkDuplicateClauses[T]( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala index e76a8556230a..3f25a1e139fa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala @@ -1163,7 +1163,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite { checkError( exception = intercept[AnalysisException](testIdent.asTableIdentifier), condition = "IDENTIFIER_TOO_MANY_NAME_PARTS", - parameters = Map("identifier" -> "`a`.`b`.`c`") + parameters = Map("identifier" -> "`a`.`b`.`c`", "limit" -> "2") ) } @@ -1172,7 +1172,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite { checkError( exception = intercept[AnalysisException](testIdent.asFunctionIdentifier), condition = "IDENTIFIER_TOO_MANY_NAME_PARTS", - parameters = Map("identifier" -> "`a`.`b`.`c`") + parameters = Map("identifier" -> "`a`.`b`.`c`", "limit" -> "2") ) } } From 582ab51f38741571e680830881df352d9c823eb9 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 5 Nov 2025 20:45:53 -0800 Subject: [PATCH 11/37] Fix flaky test --- .../sql-tests/analyzer-results/identifier-clause-legacy.sql.out | 2 +- .../sql-tests/analyzer-results/identifier-clause.sql.out | 2 +- .../src/test/resources/sql-tests/inputs/identifier-clause.sql | 2 +- .../sql-tests/results/identifier-clause-legacy.sql.out | 2 +- .../test/resources/sql-tests/results/identifier-clause.sql.out | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out index fdd90aa39489..7e4ece419983 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out @@ -2165,7 +2165,7 @@ Sort [c1#x ASC NULLS FIRST], true -- !query -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias -- !query analysis Project [c1#x, c2#x] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index b751244263c6..00e364d8717f 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -1924,7 +1924,7 @@ Sort [c1#x ASC NULLS FIRST], true -- !query -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias -- !query analysis Project [c1#x, c2#x, c4#x] diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index 64fb2e069ba8..5059c7faf407 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -332,7 +332,7 @@ EXECUTE IMMEDIATE -- Test 19: IDENTIFIER with qualified name coalescing for schema.table.column pattern -- This should work for multi-part identifiers -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias; -- Test 20: Error case - IDENTIFIER with too many parts from parameter coalescing diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out index d0e8a679fa3b..e50b3b3a3840 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out @@ -2440,7 +2440,7 @@ struct -- !query -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias -- !query schema struct diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index db7345984716..9303f60120e7 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -2121,7 +2121,7 @@ struct -- !query -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias -- !query schema struct From b9f46a30afd0f3826441205d0bc6908f6060149a Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 5 Nov 2025 21:02:06 -0800 Subject: [PATCH 12/37] Delete temporray files --- COMPARISON_SUMMARY.md | 79 -- IDENTIFIER_LITE_DESIGN.md | 264 ---- IDENTIFIER_LITE_LEGACY_CONFIG.md | 190 --- IDENTIFIER_LITE_SUMMARY.md | 126 -- MATCH_RECOGNIZE_SYNTAX.md | 570 -------- MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md | 746 ---------- identifier-clause-comparison-v2.csv | 274 ---- identifier-clause-comparison-v2.md | 452 ------ identifier-clause-comparison.csv | 274 ---- identifier-clause-comparison.md | 248 ---- identifier-clause-differences.txt | 364 ----- ...xecute-immediate-legacy-identifier.sql.out | 1226 ----------------- 12 files changed, 4813 deletions(-) delete mode 100644 COMPARISON_SUMMARY.md delete mode 100644 IDENTIFIER_LITE_DESIGN.md delete mode 100644 IDENTIFIER_LITE_LEGACY_CONFIG.md delete mode 100644 IDENTIFIER_LITE_SUMMARY.md delete mode 100644 MATCH_RECOGNIZE_SYNTAX.md delete mode 100644 MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md delete mode 100644 identifier-clause-comparison-v2.csv delete mode 100644 identifier-clause-comparison-v2.md delete mode 100644 identifier-clause-comparison.csv delete mode 100644 identifier-clause-comparison.md delete mode 100644 identifier-clause-differences.txt delete mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate-legacy-identifier.sql.out diff --git a/COMPARISON_SUMMARY.md b/COMPARISON_SUMMARY.md deleted file mode 100644 index bc5f87021b10..000000000000 --- a/COMPARISON_SUMMARY.md +++ /dev/null @@ -1,79 +0,0 @@ -# Identifier-Lite Implementation: Regression Check Summary - -## Files Generated - -1. **identifier-clause-comparison-v2.csv** - Raw CSV data with all test results -2. **identifier-clause-comparison-v2.md** - Formatted markdown table with analysis -3. **COMPARISON_SUMMARY.md** (this file) - Regression check summary - -## Regression Analysis - -### ✅ Result: NO REGRESSIONS FOUND - -Comparing the previous version (v1) with the current version (v2): -- **Total queries compared**: 227 -- **Regressions (was SUCCESS, now error)**: 0 -- **Improvements (was error, now SUCCESS)**: 0 -- **Unchanged**: 227 (100%) - -### Test Statistics - -- **Total Tests**: 227 -- **Tests from Master (baseline)**: 128 -- **New Tests Added**: 99 -- **Tests Changed from Master**: 13 (all improvements) -- **Tests with Legacy Mode Differences**: 47 (20.7%) - -## Master Comparison - -### Tests Changed from Master (13 tests - all improvements): - -1. **Query #114**: `SELECT row_number() OVER IDENTIFIER('x.win')...` - - Master: `PARSE_SYNTAX_ERROR` - - Current: `IDENTIFIER_TOO_MANY_NAME_PARTS` (better error message) - -2. **Query #115**: `SELECT T1.c1 FROM... JOIN... USING (IDENTIFIER('c1'))` - - Master: `PARSE_SYNTAX_ERROR` - - Current: `SUCCESS` ✅ - -3. **Query #117**: `SELECT map('a', 1).IDENTIFIER('a')` - - Master: `PARSE_SYNTAX_ERROR` - - Current: `SUCCESS` ✅ - -4. **Query #118**: `SELECT named_struct('a', 1).IDENTIFIER('a')` - - Master: `PARSE_SYNTAX_ERROR` - - Current: `SUCCESS` ✅ - -5. **Queries #119-123**: Window specs and dereference improvements - - Multiple queries that were failing now work or have better error messages - -6. **Queries #126-130**: DDL improvements (CREATE VIEW, CREATE TABLE, INSERT with column lists) - - These now work correctly with identifier-lite - -## Known Issues - -### 🐛 Unfixed Bug: `IDENTIFIER('t').c1` - -**Query**: `SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1)` -**Status**: Still fails with `UNRESOLVED_COLUMN.WITH_SUGGESTION` -**Expected**: Should resolve as table-qualified column reference and return `1` - -**Root Cause**: -- `IDENTIFIER_KW` is in the `nonReserved` keyword list -- Parser matches `IDENTIFIER` as a function name (via `qualifiedName` → `nonReserved`) -- Then treats `('t')` as function arguments -- Result: creates wrong AST structure - -**Investigation**: -- Attempted grammar reordering: broke other tests -- Attempted adding predicates to `functionName`: didn't prevent matching via `qualifiedName` -- Needs AST-level fix or removal of `IDENTIFIER_KW` from `nonReserved` (may have side effects) - -## Conclusion - -✅ **Safe to proceed**: No regressions introduced -✅ **Improvements made**: 13 tests that were broken now work or have better errors -✅ **New functionality**: 99 new tests covering identifier-lite features -⚠️ **One known bug**: `IDENTIFIER('t').c1` case - documented but unfixed - -The implementation is stable and provides significant improvements over master, with one edge case remaining to be fixed in future work. diff --git a/IDENTIFIER_LITE_DESIGN.md b/IDENTIFIER_LITE_DESIGN.md deleted file mode 100644 index b98cc22fe44a..000000000000 --- a/IDENTIFIER_LITE_DESIGN.md +++ /dev/null @@ -1,264 +0,0 @@ -# Identifier-Lite Feature Design - -## Overview - -The **identifier-lite** feature is a simplified version of the existing `IDENTIFIER` clause in Spark SQL. It allows `IDENTIFIER('string_literal')` to be used anywhere identifiers can appear in SQL statements, with the string literal being folded immediately during the parse phase. - -## Motivation - -The existing `IDENTIFIER` clause in Spark is limited to a narrow set of use cases: -- It only works in specific grammar positions (table references, column references, function names) -- It requires analysis-time resolution via `PlanWithUnresolvedIdentifier` and `ExpressionWithUnresolvedIdentifier` -- It supports full expressions (including parameter markers and concatenation) - -The identifier-lite feature generalizes identifier templating to **all places where identifiers can be used**, while simplifying the implementation by: -- Only accepting string literals (not arbitrary expressions) -- Folding the string literal into an identifier at parse time (not analysis time) -- Working seamlessly with all existing grammar rules that use identifiers - -## Design - -### Grammar Changes - -#### SqlBaseParser.g4 - -Added a new alternative to the `strictIdentifier` grammar rule: - -```antlr -strictIdentifier - : IDENTIFIER #unquotedIdentifier - | quotedIdentifier #quotedIdentifierAlternative - | IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN #identifierLiteral - | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier - | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier - ; -``` - -This allows `IDENTIFIER('string')` to appear anywhere a regular identifier can appear, including: -- Table names -- Column names -- Schema/database names -- Function names -- Constraint names -- And any other identifier context - -### Qualified Identifier Support - -The identifier-lite feature supports **qualified identifiers** within the string literal. When you write: -- `IDENTIFIER('`catalog`.`schema`')` - this is parsed into multiple parts: `['catalog', 'schema']` -- `IDENTIFIER('schema.table')` - parsed into: `['schema', 'table']` -- `IDENTIFIER('schema').table` - the schema part is parsed, then combined with the literal `table` - -This allows flexible composition of identifiers: -```sql --- These are all equivalent for table 'catalog.schema.table': -IDENTIFIER('catalog.schema.table') -IDENTIFIER('catalog.schema').table -IDENTIFIER('catalog').schema.table -catalog.IDENTIFIER('schema.table') -catalog.IDENTIFIER('schema').table -``` - -### Parser Implementation Changes - -#### DataTypeAstBuilder.scala - -Added helper methods to handle identifier-lite with qualified identifier support: - -```scala -protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = { - ctx match { - case idLitCtx: IdentifierLiteralContext => - // For IDENTIFIER('literal'), extract the string literal value and parse it - val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // Parse the string as a multi-part identifier (e.g., "`cat`.`schema`" -> Seq("cat", "schema")) - CatalystSqlParser.parseMultipartIdentifier(literalValue) - case _ => - // For regular identifiers, just return the text as a single part - Seq(ctx.getText) - } -} - -protected def getIdentifierText(ctx: ParserRuleContext): String = { - getIdentifierParts(ctx).mkString(".") -} -``` - -Updated `visitMultipartIdentifier()` to flatten parts when an identifier-lite contains multiple parts: - -```scala -override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] = - ctx.parts.asScala.flatMap { part => - val identifierCtx = part.identifier() - if (identifierCtx != null && identifierCtx.strictIdentifier() != null) { - // Returns Seq with 1+ elements (multiple if qualified) - getIdentifierParts(identifierCtx.strictIdentifier()) - } else { - Seq(part.getText) - } - }.toSeq -``` - -#### AstBuilder.scala - -Updated all methods that extract identifier text to use `getIdentifierParts()`: -- `visitIdentifierSeq()` - uses `getIdentifierText()` to keep list items as single strings -- `visitTableIdentifier()` - combines db and table parts from qualified identifiers -- `visitFunctionIdentifier()` - combines db and function parts from qualified identifiers -- `visitColDefinition()` - extracts column names -- Column name extraction in various contexts - -Special handling for `TableIdentifier` and `FunctionIdentifier` to properly combine parts: -```scala -override def visitTableIdentifier(ctx: TableIdentifierContext): TableIdentifier = { - val tableParts = getIdentifierParts(ctx.table.strictIdentifier()) - val dbParts = Option(ctx.db).map(db => getIdentifierParts(db.strictIdentifier())) - val allParts = dbParts.getOrElse(Seq.empty) ++ tableParts - - allParts match { - case Seq(table) => TableIdentifier(table, None) - case parts if parts.size >= 2 => - TableIdentifier(parts.last, Some(parts.dropRight(1).mkString("."))) - } -} -``` - -## Key Differences from Full IDENTIFIER Clause - -| Feature | Full IDENTIFIER Clause | Identifier-Lite | -|---------|----------------------|-----------------| -| **Syntax** | `IDENTIFIER(expression)` | `IDENTIFIER('literal')` | -| **Supported Arguments** | Any constant string expression (including parameter markers, variables, concatenation) | Only string literals | -| **Resolution Time** | Analysis phase (via `PlanWithUnresolvedIdentifier`) | Parse phase (immediately folded) | -| **Grammar Positions** | Limited to specific rules (`identifierReference`, `functionName`) | All positions where identifiers are used | -| **Use Case** | Dynamic identifier resolution with runtime values | Static identifier specification with unusual names | - -## Usage Examples - -### Table Names - -```sql --- Create table with identifier-lite -CREATE TABLE IDENTIFIER('my_table') (c1 INT); - --- Query table -SELECT * FROM IDENTIFIER('my_table'); - --- Qualified table name (fully specified) -SELECT * FROM IDENTIFIER('schema.table'); - --- Qualified table name (partial specification) -SELECT * FROM IDENTIFIER('schema').table; - --- Qualified with backticks -SELECT * FROM IDENTIFIER('`my schema`.`my table`'); -``` - -### Column Names - -```sql --- Select specific columns -SELECT IDENTIFIER('col1'), IDENTIFIER('col2') FROM t; - --- Column with special characters -SELECT IDENTIFIER('`column with spaces`') FROM t; - --- Mixed usage -CREATE TABLE t(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING); - --- Qualified column references -SELECT IDENTIFIER('t.col1') FROM t; -``` - -### Function Names - -```sql --- Use identifier-lite for function names -SELECT IDENTIFIER('abs')(-5); -SELECT IDENTIFIER('upper')('hello'); - --- Qualified function names -SELECT IDENTIFIER('schema.my_udf')(value) FROM t; -``` - -### DDL Operations - -```sql --- ALTER TABLE operations -ALTER TABLE IDENTIFIER('table_name') ADD COLUMN IDENTIFIER('new_col') INT; -ALTER TABLE IDENTIFIER('table_name') RENAME COLUMN IDENTIFIER('old') TO IDENTIFIER('new'); - --- DROP operations with qualified names -DROP TABLE IDENTIFIER('schema.table_name'); - --- Mixed qualification -DROP TABLE IDENTIFIER('schema').table_name; -``` - -### Complex Qualified Identifier Examples - -```sql --- Three-part identifier (catalog.schema.table) -SELECT * FROM IDENTIFIER('catalog.schema.table'); - --- Equivalent forms: -SELECT * FROM IDENTIFIER('catalog.schema').table; -SELECT * FROM IDENTIFIER('catalog').schema.table; -SELECT * FROM catalog.IDENTIFIER('schema.table'); -SELECT * FROM catalog.IDENTIFIER('schema').table; - --- With backticked parts -SELECT * FROM IDENTIFIER('`my catalog`.`my schema`.`my table`'); - --- Mixed backticks and regular identifiers -SELECT * FROM IDENTIFIER('`my catalog`.`my schema`').regular_table; -``` - -## Implementation Files Modified - -1. **Grammar Files**: - - `sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4` - - `sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4` (no changes - reusing existing `IDENTIFIER_KW`) - -2. **Parser Implementation**: - - `sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala` - - `sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala` - -3. **Test Files**: - - `sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql` (merged identifier-lite tests) - - `sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala` (added `IdentifierLiteSuite`) - -## Limitations - -1. **No Expression Support**: Identifier-lite only accepts string literals. Expressions like `IDENTIFIER('tab' || '_name')` or parameter markers like `IDENTIFIER(:param)` are not supported. For these use cases, the full IDENTIFIER clause should be used instead. - -2. **No Runtime Binding**: Since the identifier is folded at parse time, it cannot be changed dynamically. For dynamic identifier binding, use the full IDENTIFIER clause with parameter markers or variables. - -3. **String Literal Only**: The argument must be a string literal (`'value'` or `"value"`). Variables, parameter markers, and expressions are not supported. - -## Testing - -Test coverage includes: -1. Basic usage with table names, column names, and function names -2. Qualified identifiers (e.g., `schema.table`, `catalog.schema.table`) -3. Identifiers with special characters (backticked identifiers) -4. DDL operations (CREATE, ALTER, DROP) -5. Mixed usage with regular identifiers -6. Column definitions using identifier-lite -7. ALTER TABLE operations with identifier-lite (RENAME COLUMN, ADD COLUMN, DROP COLUMN, RENAME TABLE) -8. **Qualified table references with identifier-lite:** - - `IDENTIFIER('schema.table')` - fully qualified in one literal - - `IDENTIFIER('schema').table` - partial qualification - - `IDENTIFIER('`schema`.`table`')` - with backticks - - Mixed forms with both identifier-lite and regular identifiers - -All identifier-lite tests have been integrated into the existing `identifier-clause.sql` test suite under a dedicated section, making it easy to see the distinction between: -- Full IDENTIFIER clause tests (using expressions, concatenation, variables) -- Identifier-lite tests (using only string literals) - -## Future Enhancements - -Potential future improvements: -1. Better error messages when users try to use expressions instead of literals -2. Support for identifier-lite in additional contexts (e.g., constraint names, index names) -3. Documentation updates in the SQL reference guide diff --git a/IDENTIFIER_LITE_LEGACY_CONFIG.md b/IDENTIFIER_LITE_LEGACY_CONFIG.md deleted file mode 100644 index a8c817451839..000000000000 --- a/IDENTIFIER_LITE_LEGACY_CONFIG.md +++ /dev/null @@ -1,190 +0,0 @@ -# Legacy Configuration for IDENTIFIER Clause - -## Overview - -The identifier-lite feature introduces `IDENTIFIER('literal')` syntax that resolves string literals to identifiers at parse time. To maintain backward compatibility with the legacy `IDENTIFIER(expression)` behavior, a configuration option is provided. - -## Configuration - -### `spark.sql.legacy.identifierClause` - -- **Type**: Boolean -- **Default**: `false` (identifier-lite enabled) -- **Internal**: Yes -- **Since**: 4.1.0 - -### Behavior - -#### Default Behavior (`false`) -When `spark.sql.legacy.identifierClause = false` (default): -- **NEW**: `IDENTIFIER('literal')` is resolved at parse time to the identifier `literal` -- **LEGACY**: `IDENTIFIER(expression)` still works for dynamic table/schema references - -Examples: -```sql --- Identifier-lite: Resolved at parse time -SELECT IDENTIFIER('col1') FROM t; -- Same as: SELECT col1 FROM t - --- Parameter markers work with identifier-lite -SELECT IDENTIFIER(:param) FROM t; -- If :param = 'col1', same as SELECT col1 FROM t - --- String coalescing works with identifier-lite -SELECT IDENTIFIER('col' '1') FROM t; -- Same as: SELECT col1 FROM t - --- Legacy IDENTIFIER clause still works -DECLARE table_name = 'my_table'; -SELECT * FROM IDENTIFIER(table_name); -- Evaluated at analysis time -``` - -#### Legacy-Only Behavior (`true`) -When `spark.sql.legacy.identifierClause = true`: -- **DISABLED**: `IDENTIFIER('literal')` is NOT allowed -- **LEGACY ONLY**: Only `IDENTIFIER(expression)` is allowed - -Examples: -```sql -SET spark.sql.legacy.identifierClause = true; - --- This will FAIL with parse error -SELECT IDENTIFIER('col1') FROM t; - --- Only the legacy dynamic form works -DECLARE table_name = 'my_table'; -SELECT * FROM IDENTIFIER(table_name); -- Works -``` - -## Implementation Details - -### Grammar Rule Guards - -The identifier-lite alternatives are guarded by `{!legacy_identifier_clause_only}?` predicates: - -```antlr -strictIdentifier - : IDENTIFIER #unquotedIdentifier - | quotedIdentifier #quotedIdentifierAlternative - | {!legacy_identifier_clause_only}? IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN #identifierLiteral - | ... - ; - -errorCapturingIdentifier - : identifier errorCapturingIdentifierExtra #errorCapturingIdentifierBase - | {!legacy_identifier_clause_only}? IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN errorCapturingIdentifierExtra #identifierLiteralWithExtra - ; -``` - -### Parser Precedence - -The `identifierReference` rule is ordered to prioritize the legacy syntax: - -```antlr -identifierReference - : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN // Legacy: try first - | multipartIdentifier // Identifier-lite: try second - ; -``` - -This ensures that when identifier-lite is enabled, the parser: -1. First tries to match the legacy `IDENTIFIER(expression)` syntax -2. Only if that fails (e.g., because it's a string literal), falls back to matching identifier-lite through `multipartIdentifier` - -### Configuration Flow - -1. **SQLConf.scala**: Defines `LEGACY_IDENTIFIER_CLAUSE_ONLY` config -2. **SqlApiConf.scala**: Trait method `def legacyIdentifierClauseOnly: Boolean` -3. **SQLConf.scala**: Implementation `getConf(LEGACY_IDENTIFIER_CLAUSE_ONLY)` -4. **parsers.scala**: Sets parser boolean: `parser.legacy_identifier_clause_only = conf.legacyIdentifierClauseOnly` -5. **SqlBaseParser.g4**: Grammar predicates check `{!legacy_identifier_clause_only}?` - -## Use Cases - -### When to Use Legacy Mode (`true`) - -1. **Backward Compatibility**: Existing applications that rely exclusively on the legacy `IDENTIFIER(expression)` behavior -2. **Migration Period**: Temporarily disable identifier-lite while migrating code -3. **Testing**: Verify that code doesn't accidentally use identifier-lite syntax - -### Recommended Settings - -- **New Applications**: Keep default (`false`) to use identifier-lite -- **Existing Applications**: Test with default (`false`); use legacy mode (`true`) only if needed -- **Production**: Use default (`false`) for maximum flexibility - -## Examples - -### Complete Example: Both Modes - -```sql --- Default mode (identifier-lite enabled) -SET spark.sql.legacy.identifierClause = false; - -CREATE TABLE my_table(col1 INT, col2 STRING); - --- Identifier-lite works -SELECT IDENTIFIER('col1') FROM my_table; -- Returns col1 values - --- Legacy still works -DECLARE tab_name = 'my_table'; -SELECT * FROM IDENTIFIER(tab_name); -- Returns all rows - --- With parameters -SELECT IDENTIFIER(:col) FROM IDENTIFIER(:tab) USING 'col1' AS col, 'my_table' AS tab; - ---- - --- Legacy mode (identifier-lite disabled) -SET spark.sql.legacy.identifierClause = true; - --- Identifier-lite FAILS -SELECT IDENTIFIER('col1') FROM my_table; -- PARSE ERROR - --- Legacy still works -DECLARE tab_name = 'my_table'; -SELECT * FROM IDENTIFIER(tab_name); -- Returns all rows -``` - -## Testing - -The legacy behavior is tested in: -- `SQLViewTestSuite` - Test "SPARK-51552: Temporary variables under identifiers are not allowed in persisted view" - - Verifies that legacy `IDENTIFIER(variable)` correctly evaluates at analysis time - - Ensures proper error messages when temporary objects are referenced in persisted views - -## Related Configurations - -- `spark.sql.legacy.parameterSubstitution.constantsOnly`: Controls where parameter markers are allowed -- `spark.sql.legacy.setopsPrecedence.enabled`: Controls set operation precedence -- Both follow the same pattern of using grammar predicates for conditional syntax - -## Migration Guide - -### From Legacy to Identifier-Lite - -1. **Audit Code**: Find all uses of `IDENTIFIER(expression)` where `expression` is a variable -2. **Replace with String Literals**: - ```sql - -- Before (legacy) - DECLARE col_name = 'my_col'; - SELECT IDENTIFIER(col_name) FROM t; - - -- After (identifier-lite with parameters) - SELECT IDENTIFIER(:col) FROM t USING 'my_col' AS col; - ``` -3. **Test**: Verify all queries work with default config -4. **Deploy**: Use default `spark.sql.legacy.identifierClause = false` - -### If You Must Stay on Legacy - -Set the configuration globally or per-session: -```sql --- Spark SQL -SET spark.sql.legacy.identifierClause = true; - --- Spark properties file -spark.sql.legacy.identifierClause=true - --- SparkSession builder -spark.conf.set("spark.sql.legacy.identifierClause", "true") -``` - - diff --git a/IDENTIFIER_LITE_SUMMARY.md b/IDENTIFIER_LITE_SUMMARY.md deleted file mode 100644 index e3c26f39d2bb..000000000000 --- a/IDENTIFIER_LITE_SUMMARY.md +++ /dev/null @@ -1,126 +0,0 @@ -# Identifier-Lite Implementation Summary - -## Completed Tasks - -✅ **Grammar Changes** -- Modified `SqlBaseParser.g4` to add `IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN #identifierLiteral` as a new alternative in `strictIdentifier` -- No changes needed to `SqlBaseLexer.g4` - reused existing `IDENTIFIER_KW` token - -✅ **Parser Implementation** -- Added `getIdentifierText()` helper method in `DataTypeAstBuilder.scala` to extract identifier text from both regular identifiers and identifier-lite syntax -- Updated `visitMultipartIdentifier()` in `DataTypeAstBuilder.scala` to handle identifier-lite -- Updated `AstBuilder.scala` methods: - - `visitIdentifierSeq()` - - `visitTableIdentifier()` - - `visitFunctionIdentifier()` - - `visitColDefinition()` - - Column name extraction in `visitHiveChangeColumn()` - - Column name extraction in `visitColType()` (DataTypeAstBuilder) - -✅ **Test Coverage** -- Merged identifier-lite tests into existing `identifier-clause.sql` test suite -- Added dedicated section for identifier-lite tests with clear comments distinguishing them from full IDENTIFIER clause tests -- Created `IdentifierLiteSuite` class in `ParametersSuite.scala` with unit tests -- Test coverage includes: - - Column definitions with identifier-lite - - ALTER TABLE operations (RENAME COLUMN, ADD COLUMN, DROP COLUMN, RENAME TABLE) - - Qualified table references - - Function names - - Mixed usage scenarios - -✅ **Documentation** -- Created `IDENTIFIER_LITE_DESIGN.md` with comprehensive design documentation - -## Key Features - -### What Works Now - -The identifier-lite feature allows `IDENTIFIER('string_literal')` to be used in **all** positions where identifiers can appear: - -1. **Table Names** - ```sql - CREATE TABLE IDENTIFIER('my_table') (c1 INT); - SELECT * FROM IDENTIFIER('schema.table'); - ``` - -2. **Column Names** (including in column definitions) - ```sql - CREATE TABLE t(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING); - SELECT IDENTIFIER('col1') FROM t; - ``` - -3. **Function Names** - ```sql - SELECT IDENTIFIER('abs')(-5); - ``` - -4. **Schema Names** - ```sql - CREATE SCHEMA IDENTIFIER('my_schema'); - USE IDENTIFIER('my_schema'); - ``` - -5. **ALTER TABLE Operations** - ```sql - ALTER TABLE IDENTIFIER('t') RENAME COLUMN IDENTIFIER('old') TO IDENTIFIER('new'); - ALTER TABLE IDENTIFIER('t') ADD COLUMN IDENTIFIER('col') INT; - ``` - -6. **Qualified Identifiers** - ```sql - SELECT * FROM IDENTIFIER('schema.table.column'); - ``` - -### Key Differences from Full IDENTIFIER Clause - -| Aspect | Full IDENTIFIER | Identifier-Lite | -|--------|----------------|-----------------| -| **Syntax** | `IDENTIFIER(expr)` | `IDENTIFIER('literal')` | -| **Arguments** | Any constant expression | String literals only | -| **Resolution** | Analysis phase | Parse phase (immediate) | -| **Grammar Scope** | Limited positions | All identifier positions | - -## Files Modified - -1. `sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4` -2. `sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala` -3. `sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala` -4. `sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql` -5. `sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala` - -## Next Steps - -To complete the implementation: - -1. **Build & Test**: Run the full test suite to ensure all tests pass - ```bash - build/mvn clean test -pl sql/catalyst,sql/api,sql/core - ``` - -2. **Generate Parser**: ANTLR needs to regenerate parser classes from the grammar - ```bash - build/mvn clean compile -pl sql/api - ``` - -3. **Run Specific Tests**: - ```bash - build/mvn test -pl sql/core -Dtest=IdentifierLiteSuite - build/sbt "sql/testOnly *identifier-clause*" - ``` - -4. **Update Documentation**: Consider adding user-facing documentation to `docs/sql-ref-identifier-clause.md` - -## Design Decisions - -1. **Reused IDENTIFIER keyword**: No new keyword needed; distinction is based on argument type (literal vs expression) and resolution time -2. **Parse-time folding**: String literals are resolved immediately during parsing for simplicity and universal applicability -3. **Universal applicability**: Works in all identifier positions without special grammar rules -4. **Clean separation**: Tests clearly distinguish identifier-lite (literals) from full IDENTIFIER (expressions) - -## Benefits - -1. **Simplicity**: Parse-time folding is simpler than analysis-time resolution -2. **Universality**: Works everywhere identifiers are used, no special cases -3. **Backward Compatible**: Existing IDENTIFIER clause with expressions continues to work -4. **Clear Semantics**: String literal-only restriction makes behavior predictable - diff --git a/MATCH_RECOGNIZE_SYNTAX.md b/MATCH_RECOGNIZE_SYNTAX.md deleted file mode 100644 index a16dee1907e4..000000000000 --- a/MATCH_RECOGNIZE_SYNTAX.md +++ /dev/null @@ -1,570 +0,0 @@ -# MATCH_RECOGNIZE Clause - Syntax Description - -## Overview - -The `MATCH_RECOGNIZE` clause is a powerful SQL feature for performing pattern recognition and sequence analysis over ordered sets of rows. It enables detection of patterns in time-series data, event sequences, and other ordered datasets using a regular expression-like syntax. - -## General Syntax - -```sql -MATCH_RECOGNIZE ( - [ PARTITION BY partition_expression [, ...] ] - ORDER BY order_expression [ ASC | DESC ] [, ...] - MEASURES measure_expression AS alias [, ...] - [ ONE ROW PER MATCH | ALL ROWS PER MATCH ] - [ AFTER MATCH skip_clause ] - PATTERN ( pattern_expression ) - [ SUBSET subset_definition [, ...] ] - DEFINE pattern_variable AS condition [, ...] -) -``` - -## Clause Components - -### 1. PARTITION BY (Optional) - -**Purpose**: Divides the input data into independent partitions for parallel pattern matching. - -**Syntax**: -```sql -PARTITION BY column_expression [, column_expression ...] -``` - -**Examples from corpus**: -```sql -PARTITION BY match_0_0 -PARTITION BY accountRegion -PARTITION BY field_name -``` - -**Notes**: -- Pattern matching is performed independently within each partition -- Similar to window function partitioning -- Can be omitted for global pattern matching across all rows -- Supports single or multiple partitioning columns - -### 2. ORDER BY (Required) - -**Purpose**: Specifies the order of rows within each partition for pattern evaluation. - -**Syntax**: -```sql -ORDER BY column_expression [ ASC | DESC ] [, ...] -``` - -**Examples from corpus**: -```sql -ORDER BY p_event_time ASC -``` - -**Notes**: -- **REQUIRED** - Pattern matching depends on row ordering -- Typically orders by timestamp for temporal pattern detection -- Supports ASC (ascending) or DESC (descending) ordering -- Can specify multiple ordering columns - -### 3. MEASURES - -**Purpose**: Defines computed values to be returned for each pattern match. - -**Syntax**: -```sql -MEASURES - expression AS alias [, - expression AS alias ...] -``` - -**Common Functions Used in MEASURES**: - -| Function | Description | Example | -|----------|-------------|---------| -| `MATCH_NUMBER()` | Returns a unique identifier for each match | `MATCH_NUMBER() AS match_number` | -| `FIRST(column)` | Returns value from first row of the match | `FIRST(p_event_time) AS start_time` | -| `LAST(column)` | Returns value from last row of the match | `LAST(p_event_time) AS end_time` | -| `COUNT(pattern.*)` | Counts rows matching a specific pattern variable | `COUNT(pattern_Login.*) AS num_logins` | - -**Examples from corpus**: -```sql -MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_AWS_EC2_Startup_Script_Change.*) AS num_pattern_AWS_EC2_Startup_Script_Change, - COUNT(pattern_AWS_EC2_StopInstances.*) AS num_pattern_AWS_EC2_StopInstances -``` - -**Notes**: -- Can reference pattern variables using dot notation (e.g., `pattern_variable.*`) -- Supports aggregate functions and row-level functions -- Column references without qualifiers refer to the entire match - -### 4. Output Mode - -**Purpose**: Determines how many rows are returned per match. - -**Options**: - -#### ONE ROW PER MATCH -- Returns a single summary row for each pattern match -- Contains only MEASURES values -- **Default behavior** (if not specified) - -#### ALL ROWS PER MATCH -- Returns all rows that participated in the match -- Each row includes the MEASURES values -- Useful for detailed analysis of matched sequences - -**Examples from corpus**: -```sql -ALL ROWS PER MATCH -``` - -**Note**: All examples in the corpus use `ALL ROWS PER MATCH`. - -### 5. AFTER MATCH (Optional) - -**Purpose**: Specifies where to resume pattern matching after a match is found. - -**Syntax**: -```sql -AFTER MATCH skip_strategy -``` - -**Skip Strategies**: - -| Strategy | Description | When to Use | -|----------|-------------|-------------| -| `SKIP PAST LAST ROW` | Resume after the last row of the current match | Non-overlapping matches (most common) | -| `SKIP TO NEXT ROW` | Resume from the row after the first row of the match | Overlapping matches allowed | -| `SKIP TO FIRST pattern_variable` | Resume at the first row of the specified pattern variable | Complex overlapping scenarios | -| `SKIP TO LAST pattern_variable` | Resume at the last row of the specified pattern variable | Complex overlapping scenarios | - -**Examples from corpus**: -```sql -AFTER MATCH SKIP PAST LAST ROW -``` - -**Note**: All examples in the corpus use `SKIP PAST LAST ROW`, which is the most common strategy for detecting distinct, non-overlapping sequences. - -### 6. PATTERN (Required) - -**Purpose**: Defines the sequence pattern to match using regular expression-like syntax. - -**Syntax**: -```sql -PATTERN ( pattern_expression ) -``` - -**Pattern Quantifiers**: - -| Quantifier | Description | Example | -|------------|-------------|---------| -| `{n}` | Exactly n occurrences | `A{3}` - exactly 3 A's | -| `{n,}` | At least n occurrences | `A{1,}` - one or more A's | -| `{n,m}` | Between n and m occurrences | `A{2,5}` - 2 to 5 A's | -| `{0,0}` | Zero occurrences (used with PERMUTE for "absence" detection) | `A{0,0}` - no A's | -| `+` | One or more (equivalent to `{1,}`) | `A+` - one or more A's | -| `*` | Zero or more | `A*` - zero or more A's | -| `?` | Zero or one | `A?` - optional A | - -**Pattern Operators**: - -| Operator | Description | Example | -|----------|-------------|---------| -| Space (concatenation) | Sequential pattern | `A B C` - A followed by B followed by C | -| `\|` (alternation) | Either pattern | `A \| B` - either A or B | -| `()` (grouping) | Groups sub-patterns | `(A B)+` - one or more A-B sequences | - -**Special Pattern Functions**: - -#### PERMUTE -**Purpose**: Matches pattern variables in any order (not necessarily sequential). - -**Syntax**: -```sql -PATTERN ( PERMUTE(pattern_var1{n1,m1}, pattern_var2{n2,m2}, ...) ) -``` - -**Examples from corpus**: -```sql --- Any order of 4 different patterns -PATTERN (PERMUTE( - pattern_AWS_CloudTrail_SES_CheckSESSendingEnabled{1,}, - pattern_AWS_CloudTrail_SES_CheckSendQuota{1,}, - pattern_AWS_CloudTrail_SES_ListIdentities{1,}, - pattern_AWS_CloudTrail_SES_CheckIdentityVerifications{1,} -)) - --- Detect presence of A but absence of B (within time window) -PATTERN (PERMUTE(pattern_GitHub_Advanced_Security_Change{1,}, pattern_Github_Repo_Archived{0,0})) -``` - -**Standard Sequential Patterns**: - -```sql --- Simple sequence: A followed by B -PATTERN (pattern_A{1,} pattern_B{1,}) - --- Complex sequence: A followed by B followed by C -PATTERN (pattern_A{1,} pattern_B{1,} pattern_C{1,}) - --- Three-step sequence with minimum occurrences -PATTERN (pattern_TempStageCreated{1,} pattern_CopyIntoStage{1,} pattern_FileDownloaded{1,}) - --- Sequence with specific count requirement -PATTERN (pattern_BruteForce{5,} pattern_LoginSuccess{1,}) -``` - -### 7. SUBSET (Optional) - -**Purpose**: Creates a union of multiple pattern variables under a single alias. - -**Syntax**: -```sql -SUBSET subset_name = (pattern_var1, pattern_var2, ...) -``` - -**Use Cases**: -- Grouping related pattern variables for aggregate functions -- Simplifying DEFINE conditions that apply to multiple variables -- Creating logical groups of events - -**Note**: Not commonly used in the corpus examples, but supported in the standard. - -### 8. DEFINE (Required) - -**Purpose**: Specifies the conditions that rows must satisfy to be classified as each pattern variable. - -**Syntax**: -```sql -DEFINE - pattern_variable AS condition [, - pattern_variable AS condition ...] -``` - -**Common Condition Patterns**: - -#### Simple Equality Conditions -```sql -pattern_AWS_Console_Login AS p_rule_id = 'AWS.Console.Login' -pattern_Okta_Login AS p_rule_id = 'Okta.Login.Success' -``` - -#### Time-Based Constraints with LAG - -**Purpose**: Ensure events occur within a specified time window. - -```sql --- Pattern must occur within N minutes of previous event -pattern_Variable AS p_rule_id = 'Rule.ID' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 60) -``` - -**Common time windows from corpus**: -- 15 minutes: Quick succession events -- 30 minutes: Related security events -- 60 minutes: Related workflow events -- 90 minutes: Extended workflow patterns -- 120 minutes: Long-running processes -- 720 minutes (12 hours): Extended persistence patterns - -#### Negative Time Constraints with LAG - -**Purpose**: Ensure a preceding event did NOT occur within a time window. - -```sql --- Match if previous event was different OR happened too long ago -pattern_Variable AS p_rule_id = 'Current.Rule' - AND (LAG(p_rule_id, 1, '') != 'Previous.Rule' - OR (LAG(p_rule_id, 1, '') = 'Previous.Rule' - AND ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) > 15)) -``` - -#### Negative Time Constraints with LEAD - -**Purpose**: Ensure a following event did NOT occur within a time window. - -```sql --- Match if next event is different OR happens too far in future -pattern_Variable AS p_rule_id = 'Current.Rule' - AND (LEAD(p_rule_id, 1, '') != 'Next.Rule' - OR (LEAD(p_rule_id, 1, '') = 'Next.Rule' - AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > 60)) -``` - -**Navigation Functions in DEFINE**: - -| Function | Description | Example Use Case | -|----------|-------------|------------------| -| `LAG(column, offset, default)` | Access preceding row value | Time gap from previous event | -| `LEAD(column, offset, default)` | Access following row value | Time gap to next event | -| `PREV(column)` | Previous row (shorthand for LAG) | Price comparison | -| `FIRST(column)` | First row in match so far | Compare to starting value | -| `LAST(column)` | Last row in match so far | Compare to ending value | - -**Complex Condition Example**: -```sql -DEFINE - pattern_AWS_EC2_Startup_Script_Change AS - p_rule_id = 'AWS.EC2.Startup.Script.Change' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 90), - pattern_AWS_EC2_StopInstances AS - p_rule_id = 'AWS.EC2.StopInstances' -``` - -## Common Pattern Examples - -### 1. Sequential Event Detection - -**Use Case**: Detect A followed by B within a time window. - -```sql -MATCH_RECOGNIZE ( - PARTITION BY user_id - ORDER BY event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(event_time) AS start_time, - LAST(event_time) AS end_time - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_A{1,} pattern_B{1,}) - DEFINE - pattern_A AS event_type = 'TypeA', - pattern_B AS event_type = 'TypeB' - AND (LAG(event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(event_time), event_time)) <= 60) -) -``` - -### 2. Three-Step Sequential Pattern - -**Use Case**: Detect multi-stage attack or workflow (A → B → C). - -```sql -MATCH_RECOGNIZE ( - PARTITION BY entity_id - ORDER BY event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(event_time) AS start_time, - LAST(event_time) AS end_time, - COUNT(pattern_A.*) AS num_a, - COUNT(pattern_B.*) AS num_b, - COUNT(pattern_C.*) AS num_c - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_A{1,} pattern_B{1,} pattern_C{1,}) - DEFINE - pattern_A AS event_type = 'TypeA', - pattern_B AS event_type = 'TypeB' - AND (LAG(event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(event_time), event_time)) <= 15), - pattern_C AS event_type = 'TypeC' - AND (LAG(event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(event_time), event_time)) <= 15) -) -``` - -### 3. Absence Detection (Negative Pattern) - -**Use Case**: Detect event A without subsequent event B within time window. - -```sql -MATCH_RECOGNIZE ( - PARTITION BY entity_id - ORDER BY event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - COUNT(pattern_A.*) AS num_a - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_A{1,}) - DEFINE - pattern_A AS event_type = 'TypeA' - AND (LEAD(event_type, 1, '') != 'TypeB' - OR (LEAD(event_type, 1, '') = 'TypeB' - AND ABS(DATEDIFF(MINS, LEAD(event_time), event_time)) > 60)) -) -``` - -### 4. Unordered Pattern Matching (PERMUTE) - -**Use Case**: Detect all of multiple events in any order. - -```sql -MATCH_RECOGNIZE ( - PARTITION BY account_region - ORDER BY event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - COUNT(pattern_A.*) AS num_a, - COUNT(pattern_B.*) AS num_b, - COUNT(pattern_C.*) AS num_c - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (PERMUTE(pattern_A{1,}, pattern_B{1,}, pattern_C{1,})) - DEFINE - pattern_A AS event_type = 'TypeA', - pattern_B AS event_type = 'TypeB', - pattern_C AS event_type = 'TypeC' -) -``` - -### 5. Threshold-Based Pattern - -**Use Case**: Detect N failures followed by success (e.g., brute force). - -```sql -MATCH_RECOGNIZE ( - PARTITION BY ip_address - ORDER BY event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(event_time) AS start_time, - LAST(event_time) AS end_time, - COUNT(pattern_Failure.*) AS num_failures, - COUNT(pattern_Success.*) AS num_successes - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_Failure{5,} pattern_Success{1,}) - DEFINE - pattern_Failure AS event_type = 'LoginFailure', - pattern_Success AS event_type = 'LoginSuccess' - AND (LAG(event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(event_time), event_time)) <= 30) -) -``` - -### 6. Global Pattern (No Partitioning) - -**Use Case**: Match patterns across entire dataset. - -```sql -MATCH_RECOGNIZE ( - ORDER BY event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(event_time) AS start_time, - LAST(event_time) AS end_time - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_A{1,}) - DEFINE - pattern_A AS event_type = 'TargetEvent' - AND (LEAD(event_type, 1, '') != 'FollowUp' - OR (LEAD(event_type, 1, '') = 'FollowUp' - AND ABS(DATEDIFF(MINS, LEAD(event_time), event_time)) > 60)) -) -``` - -## Key Design Patterns from Corpus - -### 1. Time Window Constraints - -Most security and event correlation use cases require events to occur within specific time windows: - -```sql --- Within N minutes of previous event -AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 60) -``` - -### 2. Negative Pattern Detection - -Detecting what DIDN'T happen is crucial for security anomaly detection: - -```sql --- Event A without event B within time window -AND (LEAD(p_rule_id, 1, '') != 'Expected.Event' - OR (LEAD(p_rule_id, 1, '') = 'Expected.Event' - AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > threshold)) -``` - -### 3. Comprehensive Measures - -Security and audit applications typically capture: -- Match identifier: `MATCH_NUMBER()` -- Temporal bounds: `FIRST(p_event_time)`, `LAST(p_event_time)` -- Event counts: `COUNT(pattern_variable.*)` for each pattern variable - -### 4. Consistent Naming Convention - -Pattern variables follow clear naming: `pattern_` -- Example: `pattern_AWS_IAM_CreateUser`, `pattern_Okta_Login_Success` - -## Implementation Notes - -### Typical Use Cases - -1. **Security Event Correlation**: Detect multi-stage attacks, privilege escalation, account compromise -2. **Fraud Detection**: Identify suspicious transaction sequences -3. **Workflow Monitoring**: Track multi-step processes and detect anomalies -4. **SLA Monitoring**: Detect missing or delayed steps in expected sequences -5. **Behavioral Analytics**: Identify unusual patterns in user behavior - -### Performance Considerations - -1. **Partitioning**: Proper partitioning is critical for performance and correctness - - Partition by entity (user, account, IP address, etc.) - - Ensures pattern matching within related event streams - -2. **Ordering**: Always order by timestamp for temporal patterns - - Use ASC for forward-looking patterns - - Critical for LAG/LEAD correctness - -3. **Time Windows**: Use DATEDIFF constraints to limit pattern search scope - - Prevents matching across unrelated time periods - - Improves query performance - -### Best Practices - -1. **Always specify PARTITION BY** unless truly global pattern matching is needed -2. **Use ALL ROWS PER MATCH** for detailed forensics and debugging -3. **Include MATCH_NUMBER()** in MEASURES for unique match identification -4. **Capture start/end times** using FIRST() and LAST() functions -5. **Count each pattern variable** to understand match composition -6. **Use SKIP PAST LAST ROW** for non-overlapping matches (most common) -7. **Apply time window constraints** in DEFINE to ensure temporal relevance -8. **Use NULL checks with LAG/LEAD** to handle first/last rows in partition -9. **Use ABS(DATEDIFF(...))** for bidirectional time comparisons - -## SQL Dialect Notes - -The examples in this corpus appear to be written for **Snowflake SQL** dialect, evidenced by: - -- `DATEDIFF(MINS, ...)` function with MINS as first parameter -- `LATERAL FLATTEN` for JSON array expansion -- Snowflake-specific table references and system functions -- `p_occurs_since()` custom function for time filtering - -Key functions used: -- `DATEDIFF(unit, start, end)`: Calculate time difference -- `LAG(column, offset, default)`: Access previous row -- `LEAD(column, offset, default)`: Access next row -- `ABS()`: Absolute value -- `COALESCE()`: Return first non-null value -- `GET_PATH()`: Extract JSON path value - -## Conclusion - -The MATCH_RECOGNIZE clause provides a powerful, declarative way to perform pattern matching on ordered datasets. The syntax combines SQL's familiar structure with regular expression-like pattern matching, making it particularly effective for: - -- Temporal sequence analysis -- Security event correlation -- Fraud detection -- Process mining -- Behavioral analytics - -The key to effective use is: -1. Proper partitioning by entity -2. Correct temporal ordering -3. Well-defined pattern variables with time constraints -4. Comprehensive measures for analysis -5. Appropriate skip strategy for match handling - - - - diff --git a/MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md b/MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md deleted file mode 100644 index a5f157ee4ddc..000000000000 --- a/MATCH_RECOGNIZE_SYNTAX_FROM_EXAMPLES.md +++ /dev/null @@ -1,746 +0,0 @@ -# MATCH_RECOGNIZE Clause - Syntax Observed in Examples - -This document describes **only** the MATCH_RECOGNIZE syntax patterns actually present in the provided spreadsheet examples. No additional SQL standard features are included. - -## Overall Structure Observed - -Every MATCH_RECOGNIZE clause in the examples follows this structure: - -```sql -FROM table_name -MATCH_RECOGNIZE ( - [ PARTITION BY column ] - ORDER BY column ASC - MEASURES - measure_expression AS alias, - ... - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN ( pattern_expression ) - DEFINE - pattern_variable AS condition, - ... -) -``` - -## Clause Usage in Examples - -### Clauses Present in ALL Examples (18 out of 18) - -1. ✅ **ORDER BY** - Present in all 18 examples -2. ✅ **MEASURES** - Present in all 18 examples -3. ✅ **ALL ROWS PER MATCH** - Present in all 18 examples -4. ✅ **AFTER MATCH SKIP PAST LAST ROW** - Present in all 18 examples -5. ✅ **PATTERN** - Present in all 18 examples (required) -6. ✅ **DEFINE** - Present in all 18 examples (required) - -### Clauses Present in MOST Examples - -7. ✅ **PARTITION BY** - Present in 17 out of 18 examples - - Missing in: `secret_exposed_and_not_quarantined.yml` - -### Clauses NEVER Used in Examples - -- ❌ **ONE ROW PER MATCH** - Never used (all use ALL ROWS PER MATCH) -- ❌ **SUBSET** - Never used in any example - -## Detailed Clause Breakdown - -### 1. PARTITION BY - -**Usage**: 17 out of 18 examples use PARTITION BY - -**Observed Syntax**: -```sql -PARTITION BY single_column -``` - -**Examples from spreadsheet**: -```sql -PARTITION BY match_0_0 -PARTITION BY accountRegion -PARTITION BY field_name -``` - -**Notes**: -- Always partitions by exactly ONE column -- Never uses multiple columns -- One example omits PARTITION BY entirely (global matching) - -### 2. ORDER BY - -**Usage**: 18 out of 18 examples use ORDER BY - -**Observed Syntax**: -```sql -ORDER BY column ASC -``` - -**Examples from spreadsheet**: -```sql -ORDER BY p_event_time ASC -``` - -**Notes**: -- Always orders by exactly ONE column (always `p_event_time`) -- Always uses ASC (ascending) -- Never uses DESC -- Never uses multiple columns - -### 3. MEASURES - -**Usage**: 18 out of 18 examples use MEASURES - -**Observed Functions**: - -All examples use exactly this pattern: - -```sql -MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_variable_name.*) AS num_pattern_variable_name, - COUNT(pattern_variable_name2.*) AS num_pattern_variable_name2, - ... -``` - -**Functions Observed**: -1. `MATCH_NUMBER()` - Used in all 18 examples -2. `FIRST(column)` - Used in all 18 examples (always with `p_event_time`) -3. `LAST(column)` - Used in all 18 examples (always with `p_event_time`) -4. `COUNT(pattern.*)` - Used in all 18 examples (one or more per query) - -**Actual Examples**: - -```sql --- Example 1: Two pattern variables -MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_AWS_EC2_Startup_Script_Change.*) AS num_pattern_AWS_EC2_Startup_Script_Change, - COUNT(pattern_AWS_EC2_StopInstances.*) AS num_pattern_AWS_EC2_StopInstances - --- Example 2: One pattern variable -MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_AWS_Console_Sign_In.*) AS num_pattern_AWS_Console_Sign_In - --- Example 3: Four pattern variables -MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_AWS_CloudTrail_SES_CheckSendQuota.*) AS num_pattern_AWS_CloudTrail_SES_CheckSendQuota, - COUNT(pattern_AWS_CloudTrail_SES_CheckSESSendingEnabled.*) AS num_pattern_AWS_CloudTrail_SES_CheckSESSendingEnabled, - COUNT(pattern_AWS_CloudTrail_SES_CheckIdentityVerifications.*) AS num_pattern_AWS_CloudTrail_SES_CheckIdentityVerifications, - COUNT(pattern_AWS_CloudTrail_SES_ListIdentities.*) AS num_pattern_AWS_CloudTrail_SES_ListIdentities -``` - -**Pattern**: -- Every example counts occurrences of each pattern variable defined in DEFINE clause -- Naming convention: `num_` + pattern variable name - -### 4. ALL ROWS PER MATCH - -**Usage**: 18 out of 18 examples - -**Observed Syntax**: -```sql -ALL ROWS PER MATCH -``` - -**Notes**: -- 100% of examples use this -- No examples use `ONE ROW PER MATCH` - -### 5. AFTER MATCH - -**Usage**: 18 out of 18 examples - -**Observed Syntax**: -```sql -AFTER MATCH SKIP PAST LAST ROW -``` - -**Notes**: -- 100% of examples use `SKIP PAST LAST ROW` -- No other skip strategies observed: - - Never uses `SKIP TO NEXT ROW` - - Never uses `SKIP TO FIRST variable` - - Never uses `SKIP TO LAST variable` - -### 6. PATTERN - -**Usage**: 18 out of 18 examples (required) - -**Observed Pattern Types**: - -#### Type 1: Sequential Pattern (11 examples) - -**Syntax**: -```sql -PATTERN (pattern_A{n,} pattern_B{n,}) -PATTERN (pattern_A{n,} pattern_B{n,} pattern_C{n,}) -``` - -**Examples from spreadsheet**: - -```sql --- Two-step sequence -PATTERN (pattern_AWS_EC2_StopInstances{1,} pattern_AWS_EC2_Startup_Script_Change{1,}) -PATTERN (pattern_AWS_IAM_CreateUser{1,} pattern_AWS_IAM_AttachAdminUserPolicy{1,}) -PATTERN (pattern_AWS_IAM_CreateRole{1,} pattern_AWS_IAM_AttachAdminRolePolicy{1,}) -PATTERN (pattern_AWS_IAM_Backdoor_User_Keys{1,} pattern_AWS_CloudTrail_UserAccessKeyAuth{1,}) -PATTERN (pattern_AWS_CloudTrail_LoginProfileCreatedOrModified{1,} pattern_AWS_Console_Login{1,}) -PATTERN (pattern_GCP_Cloud_Run_Service_Created{1,} pattern_GCP_Cloud_Run_Set_IAM_Policy{1,}) -PATTERN (pattern_Notion_Login{1,} pattern_Notion_AccountChange{1,}) -PATTERN (pattern_OneLogin_HighRiskFailedLogin{1,} pattern_OneLogin_Login{1,}) -PATTERN (pattern_Okta_Login_Without_Push_Marker{1,} pattern_Push_Security_Phishing_Attack{1,}) -PATTERN (pattern_Wiz_Alert_Passthrough{1,} pattern_AWS_VPC_SSHAllowedSignal{1,}) -PATTERN (pattern_Crowdstrike_NewUserCreated{1,} pattern_Crowdstrike_UserDeleted{1,}) - --- Three-step sequence -PATTERN (pattern_GCP_IAM_Tag_Enumeration{1,} pattern_GCP_Tag_Binding_Creation{1,} pattern_GCP_Privileged_Operation{1,}) -PATTERN (pattern_Snowflake_TempStageCreated{1,} pattern_Snowflake_CopyIntoStage{1,} pattern_Snowflake_FileDownloaded{1,}) - --- Sequence with minimum count > 1 -PATTERN (pattern_Snowflake_Stream_BruteForceByIp{5,} pattern_Snowflake_Stream_LoginSuccess{1,}) -``` - -#### Type 2: Single Pattern (Negative Detection) (4 examples) - -**Syntax**: -```sql -PATTERN (pattern_A{1,}) -``` - -**Examples from spreadsheet**: - -```sql -PATTERN (pattern_AWS_Console_Sign_In{1,}) -PATTERN (pattern_Retrieve_SSO_access_token{1,}) -PATTERN (pattern_Okta_Login_Success{1,}) -PATTERN (pattern_GitHub_Secret_Scanning_Alert_Created{1,}) -``` - -**Note**: These use negative conditions in DEFINE with LEAD or LAG to detect absence of expected follow-up events. - -#### Type 3: PERMUTE (Unordered) Pattern (2 examples) - -**Syntax**: -```sql -PATTERN (PERMUTE(pattern_A{n,}, pattern_B{n,}, ...)) -``` - -**Examples from spreadsheet**: - -```sql --- Absence detection: A present, B absent -PATTERN (PERMUTE(pattern_GitHub_Advanced_Security_Change{1,}, pattern_Github_Repo_Archived{0,0})) - --- All four patterns in any order -PATTERN (PERMUTE( - pattern_AWS_CloudTrail_SES_CheckSESSendingEnabled{1,}, - pattern_AWS_CloudTrail_SES_CheckSendQuota{1,}, - pattern_AWS_CloudTrail_SES_ListIdentities{1,}, - pattern_AWS_CloudTrail_SES_CheckIdentityVerifications{1,} -)) -``` - -**Quantifiers Observed**: - -| Quantifier | Meaning | Example Count | -|------------|---------|---------------| -| `{1,}` | One or more | 16 examples | -| `{5,}` | Five or more | 1 example | -| `{0,0}` | Zero (absence) | 1 example | - -**Quantifiers NOT Observed**: -- Never uses `{n}` (exactly n) -- Never uses `{n,m}` (between n and m) -- Never uses `+`, `*`, or `?` shorthand -- Never uses `|` (alternation) -- Never uses `()` grouping - -### 7. DEFINE - -**Usage**: 18 out of 18 examples (required) - -**Observed Pattern**: Every DEFINE clause defines one or more pattern variables with conditions. - -**Condition Types Observed**: - -#### Type 1: Simple Equality (All Examples) - -Every pattern variable starts with a simple equality check: - -```sql -pattern_variable_name AS p_rule_id = 'Rule.Name' -``` - -#### Type 2: Simple Equality ONLY (3 examples) - -Some pattern variables have ONLY the equality condition: - -```sql -pattern_AWS_IAM_CreateUser AS p_rule_id = 'AWS.IAM.CreateUser' -pattern_GCP_IAM_Tag_Enumeration AS p_rule_id = 'GCP.IAM.Tag.Enumeration' -pattern_Snowflake_TempStageCreated AS p_rule_id = 'Snowflake.TempStageCreated' -``` - -#### Type 3: Equality + LAG Time Constraint (Most Common) - -**Syntax Pattern**: -```sql -pattern_variable AS p_rule_id = 'Rule.Name' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= N) -``` - -**Actual Examples**: - -```sql --- 15 minutes -pattern_Notion_AccountChange AS p_rule_id = 'Notion.AccountChange' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 15) - --- 30 minutes -pattern_Snowflake_Stream_LoginSuccess AS p_rule_id = 'Snowflake.Stream.LoginSuccess' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 30) - --- 60 minutes -pattern_AWS_IAM_AttachAdminUserPolicy AS p_rule_id = 'AWS.IAM.AttachAdminUserPolicy' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 60) - --- 90 minutes -pattern_AWS_EC2_Startup_Script_Change AS p_rule_id = 'AWS.EC2.Startup.Script.Change' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 90) - --- 720 minutes (12 hours) -pattern_Crowdstrike_UserDeleted AS p_rule_id = 'Crowdstrike.UserDeleted' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 720) -``` - -**Time Windows Observed**: -- 15 minutes: 3 examples -- 30 minutes: 1 example -- 60 minutes: 6 examples -- 90 minutes: 2 examples -- 120 minutes: 1 example -- 720 minutes: 1 example - -#### Type 4: Negative LAG Constraint (Absence Detection) - -**Syntax Pattern**: -```sql -pattern_variable AS p_rule_id = 'Current.Rule' - AND (LAG(p_rule_id, 1, '') != 'Previous.Rule' - OR (LAG(p_rule_id, 1, '') = 'Previous.Rule' - AND ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) > N)) -``` - -**Actual Examples**: - -```sql --- AWS Console sign-in WITHOUT Okta SSO within 15 minutes -pattern_AWS_Console_Sign_In AS p_rule_id = 'AWS.Console.Sign-In' - AND (LAG(p_rule_id, 1, '') != 'Okta.SSO.to.AWS' - OR (LAG(p_rule_id, 1, '') = 'Okta.SSO.to.AWS' - AND ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) > 15)) - --- SSO token retrieval WITHOUT CLI prompt within 120 minutes -pattern_Retrieve_SSO_access_token AS p_rule_id = 'Retrieve.SSO.access.token' - AND (LAG(p_rule_id, 1, '') != 'Sign-in.with.AWS.CLI.prompt' - OR (LAG(p_rule_id, 1, '') = 'Sign-in.with.AWS.CLI.prompt' - AND ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) > 120)) -``` - -**Pattern**: Detects event A when event B did NOT occur immediately before, or occurred too long ago. - -#### Type 5: Negative LEAD Constraint (Absence Detection) - -**Syntax Pattern**: -```sql -pattern_variable AS p_rule_id = 'Current.Rule' - AND (LEAD(p_rule_id, 1, '') != 'Next.Rule' - OR (LEAD(p_rule_id, 1, '') = 'Next.Rule' - AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > N)) -``` - -**Actual Examples**: - -```sql --- Okta login WITHOUT Push Security within 60 minutes -pattern_Okta_Login_Success AS p_rule_id = 'Okta.Login.Success' - AND (LEAD(p_rule_id, 1, '') != 'Push.Security.Authorized.IdP.Login' - OR (LEAD(p_rule_id, 1, '') = 'Push.Security.Authorized.IdP.Login' - AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > 60)) - --- GitHub secret exposed WITHOUT quarantine within 60 minutes -pattern_GitHub_Secret_Scanning_Alert_Created AS p_rule_id = 'GitHub.Secret.Scanning.Alert.Created' - AND (LEAD(p_rule_id, 1, '') != 'AWS.CloudTrail.IAMCompromisedKeyQuarantine' - OR (LEAD(p_rule_id, 1, '') = 'AWS.CloudTrail.IAMCompromisedKeyQuarantine' - AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > 60)) -``` - -**Pattern**: Detects event A when event B does NOT occur immediately after, or occurs too far in the future. - -**Functions Used in DEFINE**: - -| Function | Usage Count | Purpose | -|----------|-------------|---------| -| `LAG(column, offset, default)` | 14 examples | Access previous row value | -| `LEAD(column, offset, default)` | 2 examples | Access next row value | -| `DATEDIFF(MINS, start, end)` | 16 examples | Calculate minute difference | -| `ABS(value)` | 16 examples | Absolute value for time gaps | - -**Functions NEVER Used**: -- `PREV()` - Never used (always use LAG instead) -- `FIRST()` - Never used in DEFINE (only in MEASURES) -- `LAST()` - Never used in DEFINE (only in MEASURES) - -## Complete Pattern Templates from Examples - -### Template 1: Sequential Two-Step Pattern (Most Common) - -```sql -FROM filter_data -MATCH_RECOGNIZE ( - PARTITION BY match_column - ORDER BY p_event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_StepA.*) AS num_pattern_StepA, - COUNT(pattern_StepB.*) AS num_pattern_StepB - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_StepA{1,} pattern_StepB{1,}) - DEFINE - pattern_StepA AS p_rule_id = 'Rule.A', - pattern_StepB AS p_rule_id = 'Rule.B' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 60) -) -``` - -### Template 2: Sequential Three-Step Pattern - -```sql -FROM filter_data -MATCH_RECOGNIZE ( - PARTITION BY match_column - ORDER BY p_event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_StepA.*) AS num_pattern_StepA, - COUNT(pattern_StepB.*) AS num_pattern_StepB, - COUNT(pattern_StepC.*) AS num_pattern_StepC - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_StepA{1,} pattern_StepB{1,} pattern_StepC{1,}) - DEFINE - pattern_StepA AS p_rule_id = 'Rule.A', - pattern_StepB AS p_rule_id = 'Rule.B' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 15), - pattern_StepC AS p_rule_id = 'Rule.C' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 15) -) -``` - -### Template 3: Absence Detection (Event Without Follow-up) - -```sql -FROM filter_data -MATCH_RECOGNIZE ( - PARTITION BY match_column - ORDER BY p_event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_EventA.*) AS num_pattern_EventA - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_EventA{1,}) - DEFINE - pattern_EventA AS p_rule_id = 'Rule.A' - AND (LEAD(p_rule_id, 1, '') != 'Rule.B' - OR (LEAD(p_rule_id, 1, '') = 'Rule.B' - AND ABS(DATEDIFF(MINS, LEAD(p_event_time), p_event_time)) > 60)) -) -``` - -### Template 4: PERMUTE Pattern (Any Order) - -```sql -FROM filter_data -MATCH_RECOGNIZE ( - PARTITION BY match_column - ORDER BY p_event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_EventA.*) AS num_pattern_EventA, - COUNT(pattern_EventB.*) AS num_pattern_EventB, - COUNT(pattern_EventC.*) AS num_pattern_EventC - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (PERMUTE(pattern_EventA{1,}, pattern_EventB{1,}, pattern_EventC{1,})) - DEFINE - pattern_EventA AS p_rule_id = 'Rule.A', - pattern_EventB AS p_rule_id = 'Rule.B', - pattern_EventC AS p_rule_id = 'Rule.C' -) -``` - -### Template 5: Threshold Pattern (N occurrences then success) - -```sql -FROM filter_data -MATCH_RECOGNIZE ( - PARTITION BY match_column - ORDER BY p_event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_Failure.*) AS num_pattern_Failure, - COUNT(pattern_Success.*) AS num_pattern_Success - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_Failure{5,} pattern_Success{1,}) - DEFINE - pattern_Failure AS p_rule_id = 'Rule.Failure', - pattern_Success AS p_rule_id = 'Rule.Success' - AND (LAG(p_event_time, 1, NULL) is NULL - OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 30) -) -``` - -## Summary Statistics - -### By Clause Usage - -| Clause | Usage | Notes | -|--------|-------|-------| -| ORDER BY | 18/18 (100%) | Always `p_event_time ASC` | -| MEASURES | 18/18 (100%) | Always includes MATCH_NUMBER(), FIRST(), LAST(), COUNT() | -| ALL ROWS PER MATCH | 18/18 (100%) | No examples use ONE ROW PER MATCH | -| AFTER MATCH | 18/18 (100%) | Always `SKIP PAST LAST ROW` | -| PATTERN | 18/18 (100%) | Required clause | -| DEFINE | 18/18 (100%) | Required clause | -| PARTITION BY | 17/18 (94%) | One example omits it | -| SUBSET | 0/18 (0%) | Never used | - -### By Pattern Type - -| Pattern Type | Count | Percentage | -|--------------|-------|------------| -| Sequential (2 steps) | 10 | 56% | -| Sequential (3 steps) | 2 | 11% | -| Single pattern (absence detection) | 4 | 22% | -| PERMUTE (unordered) | 2 | 11% | - -### By DEFINE Condition Type - -| Condition Type | Approx. Count | Percentage | -|----------------|---------------|------------| -| Simple equality only | ~5 | ~28% | -| Equality + LAG time constraint | ~11 | ~61% | -| Equality + negative LAG | ~2 | ~11% | -| Equality + negative LEAD | ~2 | ~11% | - -Note: Some examples have multiple pattern variables with different condition types, so percentages don't sum to 100%. - -## Naming Conventions Observed - -### Pattern Variables -All pattern variables follow this naming convention: -``` -pattern_ -``` - -Examples: -- `pattern_AWS_IAM_CreateUser` -- `pattern_GCP_Cloud_Run_Service_Created` -- `pattern_Okta_Login_Success` -- `pattern_Snowflake_TempStageCreated` - -### Measure Aliases -All measure aliases follow consistent naming: -- Match identifier: `match_number` -- Start time: `start_time` -- End time: `end_time` -- Count pattern: `num_` (without "pattern_" prefix) - -### Column Names -The examples use consistent column naming: -- Event time: `p_event_time` -- Rule ID: `p_rule_id` -- Match key: `match_0_0`, `accountRegion`, `field_name`, `empty_match` - -## Complete Example Breakdown - -### Example 1: aws_cloudtrail_stopinstance_followed_by_modifyinstanceattributes.yml - -```sql -MATCH_RECOGNIZE ( - PARTITION BY match_0_0 - ORDER BY p_event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_AWS_EC2_Startup_Script_Change.*) AS num_pattern_AWS_EC2_Startup_Script_Change, - COUNT(pattern_AWS_EC2_StopInstances.*) AS num_pattern_AWS_EC2_StopInstances - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_AWS_EC2_StopInstances{1,} pattern_AWS_EC2_Startup_Script_Change{1,}) - DEFINE - pattern_AWS_EC2_Startup_Script_Change AS p_rule_id = 'AWS.EC2.Startup.Script.Change' - AND (LAG(p_event_time, 1, NULL) is NULL OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 90), - pattern_AWS_EC2_StopInstances AS p_rule_id = 'AWS.EC2.StopInstances' -) -``` - -**Pattern Type**: Sequential two-step -**Time Window**: 90 minutes -**Purpose**: Detect EC2 instance stop followed by startup script modification - -### Example 2: github_advanced_security_change_not_followed_by_repo_archived.yml - -```sql -MATCH_RECOGNIZE ( - PARTITION BY field_name - ORDER BY p_event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_GitHub_Advanced_Security_Change.*) AS num_pattern_GitHub_Advanced_Security_Change, - COUNT(pattern_Github_Repo_Archived.*) AS num_pattern_Github_Repo_Archived - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (PERMUTE(pattern_GitHub_Advanced_Security_Change{1,}, pattern_Github_Repo_Archived{0,0})) - DEFINE - pattern_GitHub_Advanced_Security_Change AS p_rule_id = 'GitHub.Advanced.Security.Change', - pattern_Github_Repo_Archived AS p_rule_id = 'Github.Repo.Archived' -) -``` - -**Pattern Type**: PERMUTE with absence detection (`{0,0}`) -**Time Window**: None -**Purpose**: Detect security change WITHOUT repo being archived -**Special**: Uses `HAVING num_pattern_Github_Repo_Archived = 0` after MATCH_RECOGNIZE - -### Example 3: snowflake_potential_brute_force_success.yml - -```sql -MATCH_RECOGNIZE ( - PARTITION BY match_0_0 - ORDER BY p_event_time ASC - MEASURES - MATCH_NUMBER() AS match_number, - FIRST(p_event_time) AS start_time, - LAST(p_event_time) AS end_time, - COUNT(pattern_Snowflake_Stream_BruteForceByIp.*) AS num_pattern_Snowflake_Stream_BruteForceByIp, - COUNT(pattern_Snowflake_Stream_LoginSuccess.*) AS num_pattern_Snowflake_Stream_LoginSuccess - ALL ROWS PER MATCH - AFTER MATCH SKIP PAST LAST ROW - PATTERN (pattern_Snowflake_Stream_BruteForceByIp{5,} pattern_Snowflake_Stream_LoginSuccess{1,}) - DEFINE - pattern_Snowflake_Stream_BruteForceByIp AS p_rule_id = 'Snowflake.Stream.BruteForceByIp', - pattern_Snowflake_Stream_LoginSuccess AS p_rule_id = 'Snowflake.Stream.LoginSuccess' - AND (LAG(p_event_time, 1, NULL) is NULL OR ABS(DATEDIFF(MINS, LAG(p_event_time), p_event_time)) <= 30) -) -``` - -**Pattern Type**: Threshold-based (minimum 5 failures) -**Time Window**: 30 minutes -**Purpose**: Detect at least 5 brute force attempts followed by successful login - -## Functions and Operators Summary - -### Functions Used in MEASURES -- `MATCH_NUMBER()` - 18/18 examples -- `FIRST(column)` - 18/18 examples -- `LAST(column)` - 18/18 examples -- `COUNT(pattern.*)` - 18/18 examples - -### Functions Used in DEFINE -- `LAG(column, offset, default)` - 14/18 examples -- `LEAD(column, offset, default)` - 2/18 examples -- `DATEDIFF(MINS, start, end)` - 16/18 examples -- `ABS(value)` - 16/18 examples - -### Operators Used -- `=` (equality) - All examples -- `!=` (inequality) - 4 examples (negative detection) -- `AND` - All examples with complex conditions -- `OR` - All examples with LAG/LEAD time checks -- `<=` (less than or equal) - Positive time constraints -- `>` (greater than) - Negative time constraints - -### Operators NOT Observed -- `<` (less than) -- `>=` (greater than or equal) -- `BETWEEN` -- `IN` -- `LIKE` -- Arithmetic operators (+, -, *, /) - -## Snowflake-Specific Syntax - -The examples appear to use Snowflake SQL dialect: - -1. `DATEDIFF(MINS, start_time, end_time)` - Snowflake syntax for date difference -2. `LATERAL FLATTEN` - Snowflake JSON array processing (in filter CTE, before MATCH_RECOGNIZE) -3. `p_occurs_since('N minutes')` - Custom function for time filtering (in filter CTE) -4. `GET_PATH()` - Snowflake JSON path extraction (in filter CTE) - -## Conclusion - -Based on the 18 examples in the spreadsheet, the MATCH_RECOGNIZE syntax used follows a very consistent pattern: - -**Always Present**: -- `PARTITION BY` (17/18 examples, always single column) -- `ORDER BY p_event_time ASC` (always) -- `MEASURES` with `MATCH_NUMBER()`, `FIRST()`, `LAST()`, `COUNT()` (always) -- `ALL ROWS PER MATCH` (always, never ONE ROW PER MATCH) -- `AFTER MATCH SKIP PAST LAST ROW` (always, no other strategies) -- `PATTERN` with `{1,}` or `{5,}` quantifiers (always) -- `DEFINE` with equality conditions, often with LAG/LEAD time constraints (always) - -**Never Present**: -- `SUBSET` clause -- `ONE ROW PER MATCH` -- Other `AFTER MATCH` strategies -- Quantifiers: `{n}`, `{n,m}` (except `{0,0}`), `+`, `*`, `?` -- Pattern operators: `|`, grouping with `()` -- `PREV()` function - -**Rarely Present**: -- `PERMUTE()` (2/18 examples) -- `LEAD()` (2/18 examples, mostly use LAG) -- Quantifier `{5,}` (1 example, threshold detection) -- Quantifier `{0,0}` (1 example, absence detection) - - - - diff --git a/identifier-clause-comparison-v2.csv b/identifier-clause-comparison-v2.csv deleted file mode 100644 index 89c33a385ad9..000000000000 --- a/identifier-clause-comparison-v2.csv +++ /dev/null @@ -1,274 +0,0 @@ -Query#,SQL Text,Master,identifier-clause,identifier-clause-legacy -1,SET hivevar:colname = 'c',SUCCESS,SUCCESS,SUCCESS -2,SELECT IDENTIFIER(${colname} || '_1') FROM VALUES(1) AS T(c_1),SUCCESS,SUCCESS,SUCCESS -3,SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -4,SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -5,SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -6,SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`),SUCCESS,SUCCESS,SUCCESS -7,SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``),SUCCESS,SUCCESS,SUCCESS -8,SELECT IDENTIFIER('c' || '1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -9,CREATE SCHEMA IF NOT EXISTS s,SUCCESS,SUCCESS,SUCCESS -10,CREATE TABLE s.tab(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -11,USE SCHEMA s,SUCCESS,SUCCESS,SUCCESS -12,INSERT INTO IDENTIFIER('ta' || 'b') VALUES(1),SUCCESS,SUCCESS,SUCCESS -13,DELETE FROM IDENTIFIER('ta' || 'b') WHERE 1=0,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION -14,UPDATE IDENTIFIER('ta' || 'b') SET c1 = 2,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096 -15,"MERGE INTO IDENTIFIER('ta' || 'b') AS t USING IDENTIFIER('ta' || 'b') AS s ON s.c1 = t.c1 - WHEN MATCHED THEN UPDATE SET c1 = 3",_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096 -16,SELECT * FROM IDENTIFIER('tab'),SUCCESS,SUCCESS,SUCCESS -17,SELECT * FROM IDENTIFIER('s.tab'),SUCCESS,SUCCESS,SUCCESS -18,SELECT * FROM IDENTIFIER('`s`.`tab`'),SUCCESS,SUCCESS,SUCCESS -19,SELECT * FROM IDENTIFIER('t' || 'a' || 'b'),SUCCESS,SUCCESS,SUCCESS -20,USE SCHEMA default,SUCCESS,SUCCESS,SUCCESS -21,DROP TABLE s.tab,SUCCESS,SUCCESS,SUCCESS -22,DROP SCHEMA s,SUCCESS,SUCCESS,SUCCESS -23,"SELECT IDENTIFIER('COAL' || 'ESCE')(NULL, 1)",SUCCESS,SUCCESS,SUCCESS -24,SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -25,"SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1)",SUCCESS,SUCCESS,SUCCESS -26,CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -27,DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -28,CREATE SCHEMA identifier_clauses,SUCCESS,SUCCESS,SUCCESS -29,USE identifier_clauses,SUCCESS,SUCCESS,SUCCESS -30,CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -31,DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' || 'tab'),SUCCESS,SUCCESS,SUCCESS -32,CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -33,REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION -34,CACHE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -35,UNCACHE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -36,DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -37,USE default,SUCCESS,SUCCESS,SUCCESS -38,DROP SCHEMA identifier_clauses,SUCCESS,SUCCESS,SUCCESS -39,CREATE TABLE tab(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -40,INSERT INTO tab VALUES (1),SUCCESS,SUCCESS,SUCCESS -41,SELECT c1 FROM tab,SUCCESS,SUCCESS,SUCCESS -42,DESCRIBE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -43,ANALYZE TABLE IDENTIFIER('ta' || 'b') COMPUTE STATISTICS,SUCCESS,SUCCESS,SUCCESS -44,ALTER TABLE IDENTIFIER('ta' || 'b') ADD COLUMN c2 INT,SUCCESS,SUCCESS,SUCCESS -45,SHOW TBLPROPERTIES IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -46,SHOW COLUMNS FROM IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -47,COMMENT ON TABLE IDENTIFIER('ta' || 'b') IS 'hello',SUCCESS,SUCCESS,SUCCESS -48,REFRESH TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -49,REPAIR TABLE IDENTIFIER('ta' || 'b'),NOT_A_PARTITIONED_TABLE,NOT_A_PARTITIONED_TABLE,NOT_A_PARTITIONED_TABLE -50,TRUNCATE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -51,DROP TABLE IF EXISTS tab,SUCCESS,SUCCESS,SUCCESS -52,CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1),SUCCESS,SUCCESS,SUCCESS -53,SELECT * FROM v,SUCCESS,SUCCESS,SUCCESS -54,ALTER VIEW IDENTIFIER('v') AS VALUES(2),SUCCESS,SUCCESS,SUCCESS -55,DROP VIEW IDENTIFIER('v'),SUCCESS,SUCCESS,SUCCESS -56,CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1),SUCCESS,SUCCESS,SUCCESS -57,DROP VIEW IDENTIFIER('v'),SUCCESS,SUCCESS,SUCCESS -58,CREATE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -59,ALTER SCHEMA IDENTIFIER('id' || 'ent') SET PROPERTIES (somekey = 'somevalue'),SUCCESS,SUCCESS,SUCCESS -60,ALTER SCHEMA IDENTIFIER('id' || 'ent') SET LOCATION 'someloc',SUCCESS,SUCCESS,SUCCESS -61,COMMENT ON SCHEMA IDENTIFIER('id' || 'ent') IS 'some comment',SUCCESS,SUCCESS,SUCCESS -62,DESCRIBE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -63,SHOW TABLES IN IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -64,SHOW TABLE EXTENDED IN IDENTIFIER('id' || 'ent') LIKE 'hello',SUCCESS,SUCCESS,SUCCESS -65,USE IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -66,SHOW CURRENT SCHEMA,SUCCESS,SUCCESS,SUCCESS -67,USE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -68,USE SCHEMA default,SUCCESS,SUCCESS,SUCCESS -69,DROP SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -70,CREATE SCHEMA ident,SUCCESS,SUCCESS,SUCCESS -71,CREATE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',SUCCESS,SUCCESS,SUCCESS -72,DESCRIBE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS -73,REFRESH FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS -74,DROP FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS -75,DROP SCHEMA ident,SUCCESS,SUCCESS,SUCCESS -76,CREATE TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',SUCCESS,SUCCESS,SUCCESS -77,DROP TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg'),SUCCESS,SUCCESS,SUCCESS -78,DECLARE var = 'sometable',SUCCESS,SUCCESS,SUCCESS -79,CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -80,SET VAR var = 'c1',SUCCESS,SUCCESS,SUCCESS -81,SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -82,SET VAR var = 'some',SUCCESS,SUCCESS,SUCCESS -83,DROP TABLE IDENTIFIER(var || 'table'),SUCCESS,SUCCESS,SUCCESS -84,SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`),PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -85,SELECT IDENTIFIER('') FROM VALUES(1) AS T(``),PARSE_EMPTY_STATEMENT,PARSE_EMPTY_STATEMENT,PARSE_EMPTY_STATEMENT -86,VALUES(IDENTIFIER(CAST(NULL AS STRING))),NOT_A_CONSTANT_STRING.NULL,NOT_A_CONSTANT_STRING.NULL,NOT_A_CONSTANT_STRING.NULL -87,VALUES(IDENTIFIER(1)),NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE -88,"VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1)))",NOT_A_CONSTANT_STRING.NOT_CONSTANT,NOT_A_CONSTANT_STRING.NOT_CONSTANT,NOT_A_CONSTANT_STRING.NOT_CONSTANT -89,SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1),UNRESOLVED_ROUTINE,UNRESOLVED_ROUTINE,UNRESOLVED_ROUTINE -90,CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE -91,CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -92,CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -93,DROP TABLE IDENTIFIER('a.b.c'),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -94,DROP VIEW IDENTIFIER('a.b.c'),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -95,COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello',REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -96,VALUES(IDENTIFIER(1)()),NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE -97,VALUES(IDENTIFIER('a.b.c.d')()),IDENTIFIER_TOO_MANY_NAME_PARTS,IDENTIFIER_TOO_MANY_NAME_PARTS,IDENTIFIER_TOO_MANY_NAME_PARTS -98,CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE,INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE,INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE -99,DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg'),INVALID_SQL_SYNTAX.MULTI_PART_NAME,INVALID_SQL_SYNTAX.MULTI_PART_NAME,INVALID_SQL_SYNTAX.MULTI_PART_NAME -100,CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1),TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS,TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS,TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS -101,"create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS -102,"cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS -103,"create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS -104,insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1,SUCCESS,SUCCESS,SUCCESS -105,drop view v1,SUCCESS,SUCCESS,SUCCESS -106,drop table t1,SUCCESS,SUCCESS,SUCCESS -107,drop table t2,SUCCESS,SUCCESS,SUCCESS -108,DECLARE agg = 'max',SUCCESS,SUCCESS,SUCCESS -109,DECLARE col = 'c1',SUCCESS,SUCCESS,SUCCESS -110,DECLARE tab = 'T',SUCCESS,SUCCESS,SUCCESS -111,"WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), - T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) -SELECT IDENTIFIER(agg)(IDENTIFIER(col)) FROM IDENTIFIER(tab)",SUCCESS,SUCCESS,SUCCESS -112,"WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), - T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) -SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('T')",SUCCESS,SUCCESS,SUCCESS -113,"WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) -SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' || 'BC')",SUCCESS,SUCCESS,SUCCESS -114,SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1),PARSE_SYNTAX_ERROR,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -115,SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -116,SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1),UNRESOLVED_COLUMN.WITH_SUGGESTION,UNRESOLVED_COLUMN.WITH_SUGGESTION,UNRESOLVED_COLUMN.WITH_SUGGESTION -117,"SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)",PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -118,"SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)",PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -119,SELECT * FROM s.IDENTIFIER('tab'),INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR -120,SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab'),PARSE_SYNTAX_ERROR,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR -121,SELECT * FROM IDENTIFIER('s').tab,PARSE_SYNTAX_ERROR,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR -122,SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -123,SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -124,SELECT 1 AS IDENTIFIER('col1'),NEW,SUCCESS,PARSE_SYNTAX_ERROR -125,"SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2'))",NEW,SUCCESS,PARSE_SYNTAX_ERROR -126,WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -127,CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -128,SELECT c1 FROM v,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -129,CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV,PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -130,INSERT INTO tab(IDENTIFIER('c1')) VALUES(1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -131,SELECT c1 FROM tab,SUCCESS,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -132,ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1'),NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR -133,SELECT col1 FROM tab,NEW,UNRESOLVED_COLUMN.WITH_SUGGESTION,TABLE_OR_VIEW_NOT_FOUND -134,ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT,NEW,SUCCESS,PARSE_SYNTAX_ERROR -135,SELECT c2 FROM tab,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -136,ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2'),NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR -137,ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed'),NEW,SUCCESS,PARSE_SYNTAX_ERROR -138,SELECT * FROM tab_renamed,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -139,CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV,NEW,SUCCESS,PARSE_SYNTAX_ERROR -140,DROP TABLE IF EXISTS test_col_with_dot,NEW,SUCCESS,SUCCESS -141,"SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2)",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -142,SELECT 1 AS IDENTIFIER('col1.col2'),NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -143,"CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS -144,SHOW VIEWS IN IDENTIFIER('default'),NEW,SUCCESS,SUCCESS -145,SHOW PARTITIONS IDENTIFIER('test_show'),NEW,INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY,INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY -146,SHOW CREATE TABLE IDENTIFIER('test_show'),NEW,SUCCESS,SUCCESS -147,DROP TABLE test_show,NEW,SUCCESS,SUCCESS -148,CREATE TABLE test_desc(c1 INT) USING CSV,NEW,SUCCESS,SUCCESS -149,DESCRIBE TABLE IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS -150,DESCRIBE FORMATTED IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS -151,DESCRIBE EXTENDED IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS -152,DESC IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS -153,DROP TABLE test_desc,NEW,SUCCESS,SUCCESS -154,"CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS -155,COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment',NEW,SUCCESS,SUCCESS -156,ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment',NEW,SUCCESS,PARSE_SYNTAX_ERROR -157,DROP TABLE test_comment,NEW,SUCCESS,SUCCESS -158,CREATE SCHEMA test_schema,NEW,SUCCESS,SUCCESS -159,CREATE TABLE test_schema.test_table(c1 INT) USING CSV,NEW,SUCCESS,SUCCESS -160,ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS,NEW,SUCCESS,SUCCESS -161,REFRESH TABLE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS -162,DESCRIBE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS -163,SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS -164,DROP TABLE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS -165,DROP SCHEMA test_schema,NEW,SUCCESS,SUCCESS -166,DECLARE IDENTIFIER('my_var') = 'value',NEW,SUCCESS,SUCCESS -167,SET VAR IDENTIFIER('my_var') = 'new_value',NEW,SUCCESS,PARSE_SYNTAX_ERROR -168,SELECT IDENTIFIER('my_var'),NEW,SUCCESS,SUCCESS -169,DROP TEMPORARY VARIABLE IDENTIFIER('my_var'),NEW,SUCCESS,SUCCESS -170,"CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) -RETURNS INT -RETURN IDENTIFIER('param1') + length(IDENTIFIER('param2'))",NEW,SUCCESS,PARSE_SYNTAX_ERROR -171,"SELECT test_udf(5, 'hello')",NEW,SUCCESS,UNRESOLVED_ROUTINE -172,DROP TEMPORARY FUNCTION test_udf,NEW,SUCCESS,ROUTINE_NOT_FOUND -173,"CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) -RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) -RETURN SELECT IDENTIFIER('input_val'), 'result'",NEW,SUCCESS,PARSE_SYNTAX_ERROR -174,SELECT * FROM test_table_udf(42),NEW,SUCCESS,UNRESOLVABLE_TABLE_VALUED_FUNCTION -175,DROP TEMPORARY FUNCTION test_table_udf,NEW,SUCCESS,ROUTINE_NOT_FOUND -176,"BEGIN - IDENTIFIER('loop_label'): LOOP - SELECT 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -177,LEAVE IDENTIFIER('loop_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -178,END LOOP loop_label,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -179,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -180,"BEGIN - block_label: BEGIN - DECLARE IDENTIFIER('x') INT DEFAULT 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -181,SELECT x,NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION -182,END IDENTIFIER('block_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -183,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -184,"BEGIN - DECLARE IDENTIFIER('counter') INT DEFAULT 0",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -185,"IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO - SET VAR counter = IDENTIFIER('counter') + 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -186,END WHILE while_label,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -187,SELECT IDENTIFIER('counter'),NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION -188,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -189,"BEGIN - DECLARE IDENTIFIER('cnt') INT DEFAULT 0",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -190,"repeat_label: REPEAT - SET VAR IDENTIFIER('cnt') = cnt + 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -191,"UNTIL IDENTIFIER('cnt') >= 2 - END REPEAT IDENTIFIER('repeat_label')",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -192,SELECT IDENTIFIER('cnt'),NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION -193,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -194,"BEGIN - IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO - SELECT row.c1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -195,END FOR IDENTIFIER('for_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -196,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -197,EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab,NEW,INVALID_EXTRACT_BASE_FIELD_TYPE,INVALID_EXTRACT_BASE_FIELD_TYPE -198,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' - USING 'c1' AS col1",NEW,SUCCESS,SUCCESS -199,"CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS -200,"INSERT INTO integration_test VALUES (1, 'a'), (2, 'b')",NEW,SUCCESS,SUCCESS -201,"EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' - USING 'default' AS schema, 'integration_test' AS table",NEW,SUCCESS,SUCCESS -202,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' - USING 'c' AS prefix",NEW,SUCCESS,SUCCESS -203,"EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' - USING 'c1' AS col, 1 AS val",NEW,SUCCESS,SUCCESS -204,"CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV",NEW,SUCCESS,SUCCESS -205,"INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y')",NEW,SUCCESS,SUCCESS -206,"EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' - USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col",NEW,SUCCESS,PARSE_SYNTAX_ERROR -207,"EXECUTE IMMEDIATE - 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) ORDER BY IDENTIFIER(:ord)) as rn FROM integration_test' - USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord",NEW,SUCCESS,SUCCESS -208,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' - USING 'c' AS prefix, 'count' AS agg, 'c1' AS col",NEW,SUCCESS,SUCCESS -209,"EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' - USING 'c1' AS col1, 'c2' AS col2",NEW,SUCCESS,SUCCESS -210,"EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' - USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2",NEW,SUCCESS,PARSE_SYNTAX_ERROR -211,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' - USING 'default' AS schema, 'my_table' AS table, 't' AS alias",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -212,"EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' - USING 'my_cte' AS cte_name",NEW,SUCCESS,PARSE_SYNTAX_ERROR -213,"EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' - USING 'test_view' AS view_name, 'test_col' AS col_name",NEW,SUCCESS,PARSE_SYNTAX_ERROR -214,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' - USING 'test_col' AS col, 'test_view' AS view",NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -215,DROP VIEW test_view,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -216,"EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' - USING 'integration_test' AS tab, 'c4' AS new_col",NEW,SUCCESS,PARSE_SYNTAX_ERROR -217,"EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' - USING 'integration_test' AS tab, 'c4' AS old_col, 'c5' AS new_col",NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR -218,"EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' - USING 'mykey' AS key, 42 AS val",NEW,SUCCESS,PARSE_SYNTAX_ERROR -219,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' - USING 't' AS alias",NEW,UNRESOLVED_COLUMN.WITH_SUGGESTION,PARSE_SYNTAX_ERROR -220,"EXECUTE IMMEDIATE - 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' - USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab",NEW,SUCCESS,SUCCESS -221,"EXECUTE IMMEDIATE - 'BEGIN - DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -222,SELECT IDENTIFIER(:var_name) AS result,NEW,UNBOUND_SQL_PARAMETER,UNBOUND_SQL_PARAMETER -223,"END' - USING 'my_variable' AS var_name, 100 AS var_value",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -224,"EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' - USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias",NEW,SUCCESS,SUCCESS -225,"EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' - USING 'default' AS schema, 'col1' AS col",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -226,DROP TABLE integration_test,NEW,SUCCESS,SUCCESS -227,DROP TABLE integration_test2,NEW,SUCCESS,SUCCESS diff --git a/identifier-clause-comparison-v2.md b/identifier-clause-comparison-v2.md deleted file mode 100644 index 8e8fe11c915c..000000000000 --- a/identifier-clause-comparison-v2.md +++ /dev/null @@ -1,452 +0,0 @@ -# IDENTIFIER Clause Test Comparison (v2) - -Comprehensive comparison of IDENTIFIER clause behavior across different modes. - -- **Total Tests**: 227 -- **Tests from Master**: 128 -- **New Tests Added**: 99 -- **Tests Changed from Master**: 13 -- **Tests with Legacy Mode Differences**: 47 - -## Legend - -- **Query#**: Sequential test number -- **SQL Text**: The SQL query being tested -- **Master**: Result from master branch (before identifier-lite changes) -- **identifier-clause**: Result with identifier-lite enabled (default mode, current) -- **identifier-clause-legacy**: Result with `spark.sql.legacy.identifierClause=true` - -### Result Values - -- `SUCCESS`: Query executed successfully -- ``: Query failed with the specified error class -- `NEW`: Test did not exist in master baseline - ---- - -## Full Test Results - -| Query# | SQL Text | Master | identifier-clause | identifier-clause-legacy | -|--------|----------|--------|-------------------|--------------------------| -| 1 | SET hivevar:colname = 'c' | SUCCESS | SUCCESS | SUCCESS | -| 2 | SELECT IDENTIFIER(${colname} \|\| '_1') FROM VALUES(1) AS T(c_1) | SUCCESS | SUCCESS | SUCCESS | -| 3 | SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 4 | SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 5 | SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 6 | SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`) | SUCCESS | SUCCESS | SUCCESS | -| 7 | SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``) | SUCCESS | SUCCESS | SUCCESS | -| 8 | SELECT IDENTIFIER('c' \|\| '1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 9 | CREATE SCHEMA IF NOT EXISTS s | SUCCESS | SUCCESS | SUCCESS | -| 10 | CREATE TABLE s.tab(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 11 | USE SCHEMA s | SUCCESS | SUCCESS | SUCCESS | -| 12 | INSERT INTO IDENTIFIER('ta' \|\| 'b') VALUES(1) | SUCCESS | SUCCESS | SUCCESS | -| 13 | DELETE FROM IDENTIFIER('ta' \|\| 'b') WHERE 1=0 | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | -| 14 | UPDATE IDENTIFIER('ta' \|\| 'b') SET c1 = 2 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | -| 15 | MERGE INTO IDENTIFIER('ta' \|\| 'b') AS t USING IDENTIFIER('ta' \|\| 'b') AS s ON s.c1 = t.c1 WHEN MATCHED THEN UPD... | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | -| 16 | SELECT * FROM IDENTIFIER('tab') | SUCCESS | SUCCESS | SUCCESS | -| 17 | SELECT * FROM IDENTIFIER('s.tab') | SUCCESS | SUCCESS | SUCCESS | -| 18 | SELECT * FROM IDENTIFIER('`s`.`tab`') | SUCCESS | SUCCESS | SUCCESS | -| 19 | SELECT * FROM IDENTIFIER('t' \|\| 'a' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 20 | USE SCHEMA default | SUCCESS | SUCCESS | SUCCESS | -| 21 | DROP TABLE s.tab | SUCCESS | SUCCESS | SUCCESS | -| 22 | DROP SCHEMA s | SUCCESS | SUCCESS | SUCCESS | -| 23 | SELECT IDENTIFIER('COAL' \|\| 'ESCE')(NULL, 1) | SUCCESS | SUCCESS | SUCCESS | -| 24 | SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 25 | SELECT * FROM IDENTIFIER('ra' \|\| 'nge')(0, 1) | SUCCESS | SUCCESS | SUCCESS | -| 26 | CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 27 | DROP TABLE IF EXISTS IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 28 | CREATE SCHEMA identifier_clauses | SUCCESS | SUCCESS | SUCCESS | -| 29 | USE identifier_clauses | SUCCESS | SUCCESS | SUCCESS | -| 30 | CREATE TABLE IDENTIFIER('ta' \|\| 'b')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 31 | DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' \|\| 'tab') | SUCCESS | SUCCESS | SUCCESS | -| 32 | CREATE TABLE IDENTIFIER('identifier_clauses.' \|\| 'tab')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 33 | REPLACE TABLE IDENTIFIER('identifier_clauses.' \|\| 'tab')(c1 INT) USING CSV | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | -| 34 | CACHE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 35 | UNCACHE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 36 | DROP TABLE IF EXISTS IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 37 | USE default | SUCCESS | SUCCESS | SUCCESS | -| 38 | DROP SCHEMA identifier_clauses | SUCCESS | SUCCESS | SUCCESS | -| 39 | CREATE TABLE tab(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 40 | INSERT INTO tab VALUES (1) | SUCCESS | SUCCESS | SUCCESS | -| 41 | SELECT c1 FROM tab | SUCCESS | SUCCESS | SUCCESS | -| 42 | DESCRIBE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 43 | ANALYZE TABLE IDENTIFIER('ta' \|\| 'b') COMPUTE STATISTICS | SUCCESS | SUCCESS | SUCCESS | -| 44 | ALTER TABLE IDENTIFIER('ta' \|\| 'b') ADD COLUMN c2 INT | SUCCESS | SUCCESS | SUCCESS | -| 45 | SHOW TBLPROPERTIES IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 46 | SHOW COLUMNS FROM IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 47 | COMMENT ON TABLE IDENTIFIER('ta' \|\| 'b') IS 'hello' | SUCCESS | SUCCESS | SUCCESS | -| 48 | REFRESH TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 49 | REPAIR TABLE IDENTIFIER('ta' \|\| 'b') | NOT_A_PARTITIONED_TABLE | NOT_A_PARTITIONED_TABLE | NOT_A_PARTITIONED_TABLE | -| 50 | TRUNCATE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 51 | DROP TABLE IF EXISTS tab | SUCCESS | SUCCESS | SUCCESS | -| 52 | CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1) | SUCCESS | SUCCESS | SUCCESS | -| 53 | SELECT * FROM v | SUCCESS | SUCCESS | SUCCESS | -| 54 | ALTER VIEW IDENTIFIER('v') AS VALUES(2) | SUCCESS | SUCCESS | SUCCESS | -| 55 | DROP VIEW IDENTIFIER('v') | SUCCESS | SUCCESS | SUCCESS | -| 56 | CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1) | SUCCESS | SUCCESS | SUCCESS | -| 57 | DROP VIEW IDENTIFIER('v') | SUCCESS | SUCCESS | SUCCESS | -| 58 | CREATE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 59 | ALTER SCHEMA IDENTIFIER('id' \|\| 'ent') SET PROPERTIES (somekey = 'somevalue') | SUCCESS | SUCCESS | SUCCESS | -| 60 | ALTER SCHEMA IDENTIFIER('id' \|\| 'ent') SET LOCATION 'someloc' | SUCCESS | SUCCESS | SUCCESS | -| 61 | COMMENT ON SCHEMA IDENTIFIER('id' \|\| 'ent') IS 'some comment' | SUCCESS | SUCCESS | SUCCESS | -| 62 | DESCRIBE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 63 | SHOW TABLES IN IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 64 | SHOW TABLE EXTENDED IN IDENTIFIER('id' \|\| 'ent') LIKE 'hello' | SUCCESS | SUCCESS | SUCCESS | -| 65 | USE IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 66 | SHOW CURRENT SCHEMA | SUCCESS | SUCCESS | SUCCESS | -| 67 | USE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 68 | USE SCHEMA default | SUCCESS | SUCCESS | SUCCESS | -| 69 | DROP SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 70 | CREATE SCHEMA ident | SUCCESS | SUCCESS | SUCCESS | -| 71 | CREATE FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | SUCCESS | SUCCESS | SUCCESS | -| 72 | DESCRIBE FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | -| 73 | REFRESH FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | -| 74 | DROP FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | -| 75 | DROP SCHEMA ident | SUCCESS | SUCCESS | SUCCESS | -| 76 | CREATE TEMPORARY FUNCTION IDENTIFIER('my' \|\| 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | SUCCESS | SUCCESS | SUCCESS | -| 77 | DROP TEMPORARY FUNCTION IDENTIFIER('my' \|\| 'DoubleAvg') | SUCCESS | SUCCESS | SUCCESS | -| 78 | DECLARE var = 'sometable' | SUCCESS | SUCCESS | SUCCESS | -| 79 | CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 80 | SET VAR var = 'c1' | SUCCESS | SUCCESS | SUCCESS | -| 81 | SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 82 | SET VAR var = 'some' | SUCCESS | SUCCESS | SUCCESS | -| 83 | DROP TABLE IDENTIFIER(var \|\| 'table') | SUCCESS | SUCCESS | SUCCESS | -| 84 | SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`) | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 85 | SELECT IDENTIFIER('') FROM VALUES(1) AS T(``) | PARSE_EMPTY_STATEMENT | PARSE_EMPTY_STATEMENT | PARSE_EMPTY_STATEMENT | -| 86 | VALUES(IDENTIFIER(CAST(NULL AS STRING))) | NOT_A_CONSTANT_STRING.NULL | NOT_A_CONSTANT_STRING.NULL | NOT_A_CONSTANT_STRING.NULL | -| 87 | VALUES(IDENTIFIER(1)) | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | -| 88 | VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1))) | NOT_A_CONSTANT_STRING.NOT_CONSTANT | NOT_A_CONSTANT_STRING.NOT_CONSTANT | NOT_A_CONSTANT_STRING.NOT_CONSTANT | -| 89 | SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1) | UNRESOLVED_ROUTINE | UNRESOLVED_ROUTINE | UNRESOLVED_ROUTINE | -| 90 | CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | -| 91 | CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 92 | CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1) | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 93 | DROP TABLE IDENTIFIER('a.b.c') | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 94 | DROP VIEW IDENTIFIER('a.b.c') | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 95 | COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello' | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 96 | VALUES(IDENTIFIER(1)()) | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | -| 97 | VALUES(IDENTIFIER('a.b.c.d')()) | IDENTIFIER_TOO_MANY_NAME_PARTS | IDENTIFIER_TOO_MANY_NAME_PARTS | IDENTIFIER_TOO_MANY_NAME_PARTS | -| 98 | CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' \|\| 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | -| 99 | DROP TEMPORARY FUNCTION IDENTIFIER('default.my' \|\| 'DoubleAvg') | INVALID_SQL_SYNTAX.MULTI_PART_NAME | INVALID_SQL_SYNTAX.MULTI_PART_NAME | INVALID_SQL_SYNTAX.MULTI_PART_NAME | -| 100 | CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1) | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | -| 101 | create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | -| 102 | cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | -| 103 | create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | -| 104 | insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1 | SUCCESS | SUCCESS | SUCCESS | -| 105 | drop view v1 | SUCCESS | SUCCESS | SUCCESS | -| 106 | drop table t1 | SUCCESS | SUCCESS | SUCCESS | -| 107 | drop table t2 | SUCCESS | SUCCESS | SUCCESS | -| 108 | DECLARE agg = 'max' | SUCCESS | SUCCESS | SUCCESS | -| 109 | DECLARE col = 'c1' | SUCCESS | SUCCESS | SUCCESS | -| 110 | DECLARE tab = 'T' | SUCCESS | SUCCESS | SUCCESS | -| 111 | WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) SELECT IDENTIFIER(agg)(ID... | SUCCESS | SUCCESS | SUCCESS | -| 112 | WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) SELECT IDENTIFIER('max')(... | SUCCESS | SUCCESS | SUCCESS | -| 113 | WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' \|\| 'BC') | SUCCESS | SUCCESS | SUCCESS | -| 114 | SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **IDENTIFIER_TOO_MANY_NAME_PARTS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 115 | SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 116 | SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1) | UNRESOLVED_COLUMN.WITH_SUGGESTION | UNRESOLVED_COLUMN.WITH_SUGGESTION | UNRESOLVED_COLUMN.WITH_SUGGESTION | -| 117 | SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 118 | SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 119 | SELECT * FROM s.IDENTIFIER('tab') | INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 120 | SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 121 | SELECT * FROM IDENTIFIER('s').tab | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 122 | SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 123 | SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 124 | SELECT 1 AS IDENTIFIER('col1') | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 125 | SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 126 | WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 127 | CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 128 | SELECT c1 FROM v | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 129 | CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 130 | INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 131 | SELECT c1 FROM tab | SUCCESS | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 132 | ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | -| 133 | SELECT col1 FROM tab | **NEW** | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **TABLE_OR_VIEW_NOT_FOUND** | -| 134 | ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 135 | SELECT c2 FROM tab | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 136 | ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | -| 137 | ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 138 | SELECT * FROM tab_renamed | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 139 | CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 140 | DROP TABLE IF EXISTS test_col_with_dot | **NEW** | SUCCESS | SUCCESS | -| 141 | SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 142 | SELECT 1 AS IDENTIFIER('col1.col2') | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 143 | CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 144 | SHOW VIEWS IN IDENTIFIER('default') | **NEW** | SUCCESS | SUCCESS | -| 145 | SHOW PARTITIONS IDENTIFIER('test_show') | **NEW** | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | -| 146 | SHOW CREATE TABLE IDENTIFIER('test_show') | **NEW** | SUCCESS | SUCCESS | -| 147 | DROP TABLE test_show | **NEW** | SUCCESS | SUCCESS | -| 148 | CREATE TABLE test_desc(c1 INT) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 149 | DESCRIBE TABLE IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | -| 150 | DESCRIBE FORMATTED IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | -| 151 | DESCRIBE EXTENDED IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | -| 152 | DESC IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | -| 153 | DROP TABLE test_desc | **NEW** | SUCCESS | SUCCESS | -| 154 | CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 155 | COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' | **NEW** | SUCCESS | SUCCESS | -| 156 | ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 157 | DROP TABLE test_comment | **NEW** | SUCCESS | SUCCESS | -| 158 | CREATE SCHEMA test_schema | **NEW** | SUCCESS | SUCCESS | -| 159 | CREATE TABLE test_schema.test_table(c1 INT) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 160 | ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS | **NEW** | SUCCESS | SUCCESS | -| 161 | REFRESH TABLE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | -| 162 | DESCRIBE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | -| 163 | SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | -| 164 | DROP TABLE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | -| 165 | DROP SCHEMA test_schema | **NEW** | SUCCESS | SUCCESS | -| 166 | DECLARE IDENTIFIER('my_var') = 'value' | **NEW** | SUCCESS | SUCCESS | -| 167 | SET VAR IDENTIFIER('my_var') = 'new_value' | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 168 | SELECT IDENTIFIER('my_var') | **NEW** | SUCCESS | SUCCESS | -| 169 | DROP TEMPORARY VARIABLE IDENTIFIER('my_var') | **NEW** | SUCCESS | SUCCESS | -| 170 | CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) RETURNS INT RETURN IDENTIFI... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 171 | SELECT test_udf(5, 'hello') | **NEW** | **SUCCESS** | **UNRESOLVED_ROUTINE** | -| 172 | DROP TEMPORARY FUNCTION test_udf | **NEW** | **SUCCESS** | **ROUTINE_NOT_FOUND** | -| 173 | CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIE... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 174 | SELECT * FROM test_table_udf(42) | **NEW** | **SUCCESS** | **UNRESOLVABLE_TABLE_VALUED_FUNCTION** | -| 175 | DROP TEMPORARY FUNCTION test_table_udf | **NEW** | **SUCCESS** | **ROUTINE_NOT_FOUND** | -| 176 | BEGIN IDENTIFIER('loop_label'): LOOP SELECT 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 177 | LEAVE IDENTIFIER('loop_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 178 | END LOOP loop_label | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 179 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 180 | BEGIN block_label: BEGIN DECLARE IDENTIFIER('x') INT DEFAULT 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 181 | SELECT x | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | -| 182 | END IDENTIFIER('block_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 183 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 184 | BEGIN DECLARE IDENTIFIER('counter') INT DEFAULT 0 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 185 | IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO SET VAR counter = IDENTIFIER('counter') + 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 186 | END WHILE while_label | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 187 | SELECT IDENTIFIER('counter') | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | -| 188 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 189 | BEGIN DECLARE IDENTIFIER('cnt') INT DEFAULT 0 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 190 | repeat_label: REPEAT SET VAR IDENTIFIER('cnt') = cnt + 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 191 | UNTIL IDENTIFIER('cnt') >= 2 END REPEAT IDENTIFIER('repeat_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 192 | SELECT IDENTIFIER('cnt') | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | -| 193 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 194 | BEGIN IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO SELECT row.c1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 195 | END FOR IDENTIFIER('for_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 196 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 197 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab | **NEW** | INVALID_EXTRACT_BASE_FIELD_TYPE | INVALID_EXTRACT_BASE_FIELD_TYPE | -| 198 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' USING 'c1' AS col1 | **NEW** | SUCCESS | SUCCESS | -| 199 | CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 200 | INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') | **NEW** | SUCCESS | SUCCESS | -| 201 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' USING 'default' AS schema, 'integration_test' AS... | **NEW** | SUCCESS | SUCCESS | -| 202 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' USING 'c' AS ... | **NEW** | SUCCESS | SUCCESS | -| 203 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' USING 'c1' AS col, 1 AS val | **NEW** | SUCCESS | SUCCESS | -| 204 | CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 205 | INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') | **NEW** | SUCCESS | SUCCESS | -| 206 | EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' USIN... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 207 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) O... | **NEW** | SUCCESS | SUCCESS | -| 208 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP B... | **NEW** | SUCCESS | SUCCESS | -| 209 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' USING 'c1' AS... | **NEW** | SUCCESS | SUCCESS | -| 210 | EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' USING ... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 211 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) ... | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 212 | EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' USING 'my_ct... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 213 | EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' USI... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 214 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' USING 'test_col' AS col, 'test_view' AS view | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 215 | DROP VIEW test_view | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 216 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' USING 'integration_test' AS ta... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 217 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' USING '... | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | -| 218 | EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' USING 'mykey' AS key, 42 AS val | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 219 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' USING 't' AS alias | **NEW** | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **PARSE_SYNTAX_ERROR** | -| 220 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIF... | **NEW** | SUCCESS | SUCCESS | -| 221 | EXECUTE IMMEDIATE 'BEGIN DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 222 | SELECT IDENTIFIER(:var_name) AS result | **NEW** | UNBOUND_SQL_PARAMETER | UNBOUND_SQL_PARAMETER | -| 223 | END' USING 'my_variable' AS var_name, 100 AS var_value | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 224 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' ... | **NEW** | SUCCESS | SUCCESS | -| 225 | EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' USING 'default' AS schema, 'col1' AS col | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 226 | DROP TABLE integration_test | **NEW** | SUCCESS | SUCCESS | -| 227 | DROP TABLE integration_test2 | **NEW** | SUCCESS | SUCCESS | - ---- - -## New Tests Added - -### 99 New Tests - -| Query# | SQL Text | identifier-clause | identifier-clause-legacy | -|--------|----------|-------------------|--------------------------| -| 124 | SELECT 1 AS IDENTIFIER('col1') | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 125 | SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 128 | SELECT c1 FROM v | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 132 | ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | -| 133 | SELECT col1 FROM tab | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **TABLE_OR_VIEW_NOT_FOUND** | -| 134 | ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 135 | SELECT c2 FROM tab | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 136 | ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | -| 137 | ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 138 | SELECT * FROM tab_renamed | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 139 | CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 140 | DROP TABLE IF EXISTS test_col_with_dot | SUCCESS | SUCCESS | -| 141 | SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 142 | SELECT 1 AS IDENTIFIER('col1.col2') | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 143 | CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV | SUCCESS | SUCCESS | -| 144 | SHOW VIEWS IN IDENTIFIER('default') | SUCCESS | SUCCESS | -| 145 | SHOW PARTITIONS IDENTIFIER('test_show') | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | -| 146 | SHOW CREATE TABLE IDENTIFIER('test_show') | SUCCESS | SUCCESS | -| 147 | DROP TABLE test_show | SUCCESS | SUCCESS | -| 148 | CREATE TABLE test_desc(c1 INT) USING CSV | SUCCESS | SUCCESS | -| 149 | DESCRIBE TABLE IDENTIFIER('test_desc') | SUCCESS | SUCCESS | -| 150 | DESCRIBE FORMATTED IDENTIFIER('test_desc') | SUCCESS | SUCCESS | -| 151 | DESCRIBE EXTENDED IDENTIFIER('test_desc') | SUCCESS | SUCCESS | -| 152 | DESC IDENTIFIER('test_desc') | SUCCESS | SUCCESS | -| 153 | DROP TABLE test_desc | SUCCESS | SUCCESS | -| 154 | CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV | SUCCESS | SUCCESS | -| 155 | COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' | SUCCESS | SUCCESS | -| 156 | ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 157 | DROP TABLE test_comment | SUCCESS | SUCCESS | -| 158 | CREATE SCHEMA test_schema | SUCCESS | SUCCESS | -| 159 | CREATE TABLE test_schema.test_table(c1 INT) USING CSV | SUCCESS | SUCCESS | -| 160 | ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS | SUCCESS | SUCCESS | -| 161 | REFRESH TABLE IDENTIFIER('test_schema.test_table') | SUCCESS | SUCCESS | -| 162 | DESCRIBE IDENTIFIER('test_schema.test_table') | SUCCESS | SUCCESS | -| 163 | SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') | SUCCESS | SUCCESS | -| 164 | DROP TABLE IDENTIFIER('test_schema.test_table') | SUCCESS | SUCCESS | -| 165 | DROP SCHEMA test_schema | SUCCESS | SUCCESS | -| 166 | DECLARE IDENTIFIER('my_var') = 'value' | SUCCESS | SUCCESS | -| 167 | SET VAR IDENTIFIER('my_var') = 'new_value' | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 168 | SELECT IDENTIFIER('my_var') | SUCCESS | SUCCESS | -| 169 | DROP TEMPORARY VARIABLE IDENTIFIER('my_var') | SUCCESS | SUCCESS | -| 170 | CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) RETURNS... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 171 | SELECT test_udf(5, 'hello') | **SUCCESS** | **UNRESOLVED_ROUTINE** | -| 172 | DROP TEMPORARY FUNCTION test_udf | **SUCCESS** | **ROUTINE_NOT_FOUND** | -| 173 | CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) RETURNS TABLE(IDENTIFIER('c... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 174 | SELECT * FROM test_table_udf(42) | **SUCCESS** | **UNRESOLVABLE_TABLE_VALUED_FUNCTION** | -| 175 | DROP TEMPORARY FUNCTION test_table_udf | **SUCCESS** | **ROUTINE_NOT_FOUND** | -| 176 | BEGIN IDENTIFIER('loop_label'): LOOP SELECT 1 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 177 | LEAVE IDENTIFIER('loop_label') | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 178 | END LOOP loop_label | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 179 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 180 | BEGIN block_label: BEGIN DECLARE IDENTIFIER('x') INT DEFAULT 1 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 181 | SELECT x | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | -| 182 | END IDENTIFIER('block_label') | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 183 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 184 | BEGIN DECLARE IDENTIFIER('counter') INT DEFAULT 0 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 185 | IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO SET VAR counter = IDENTIFIER('c... | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 186 | END WHILE while_label | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 187 | SELECT IDENTIFIER('counter') | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | -| 188 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 189 | BEGIN DECLARE IDENTIFIER('cnt') INT DEFAULT 0 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 190 | repeat_label: REPEAT SET VAR IDENTIFIER('cnt') = cnt + 1 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 191 | UNTIL IDENTIFIER('cnt') >= 2 END REPEAT IDENTIFIER('repeat_label') | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 192 | SELECT IDENTIFIER('cnt') | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | -| 193 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 194 | BEGIN IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO SELECT row.c1 | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 195 | END FOR IDENTIFIER('for_label') | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 196 | END | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 197 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab | INVALID_EXTRACT_BASE_FIELD_TYPE | INVALID_EXTRACT_BASE_FIELD_TYPE | -| 198 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1... | SUCCESS | SUCCESS | -| 199 | CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV | SUCCESS | SUCCESS | -| 200 | INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') | SUCCESS | SUCCESS | -| 201 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' USING 'default' AS schema, '... | SUCCESS | SUCCESS | -| 202 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_t... | SUCCESS | SUCCESS | -| 203 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' USING 'c1' AS ... | SUCCESS | SUCCESS | -| 204 | CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV | SUCCESS | SUCCESS | -| 205 | INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') | SUCCESS | SUCCESS | -| 206 | EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENT... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 207 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY... | SUCCESS | SUCCESS | -| 208 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM inte... | SUCCESS | SUCCESS | -| 209 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:co... | SUCCESS | SUCCESS | -| 210 | EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:va... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 211 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_s... | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 212 | EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_n... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 213 | EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name))... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 214 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' USING 'test_col' AS col, 'te... | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 215 | DROP VIEW test_view | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 216 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' USING 'int... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 217 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(... | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | -| 218 | EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' USING 'mykey' AS key, 42 ... | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 219 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)'... | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **PARSE_SYNTAX_ERROR** | -| 220 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.''... | SUCCESS | SUCCESS | -| 221 | EXECUTE IMMEDIATE 'BEGIN DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 222 | SELECT IDENTIFIER(:var_name) AS result | UNBOUND_SQL_PARAMETER | UNBOUND_SQL_PARAMETER | -| 223 | END' USING 'my_variable' AS var_name, 100 AS var_value | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 224 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_al... | SUCCESS | SUCCESS | -| 225 | EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' USING 'default' AS schema, 'col1... | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 226 | DROP TABLE integration_test | SUCCESS | SUCCESS | -| 227 | DROP TABLE integration_test2 | SUCCESS | SUCCESS | - ---- - -## Tests Changed from Master - -### 13 Tests with Different Behavior from Master - -| Query# | SQL Text | Master | identifier-clause | Notes | -|--------|----------|--------|-------------------|-------| -| 114 | SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW w... | PARSE_SYNTAX_ERROR | **IDENTIFIER_TOO_MANY_NAME_PARTS** | Behavior changed | -| 115 | SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIF... | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | -| 117 | SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | -| 118 | SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | -| 119 | SELECT * FROM s.IDENTIFIER('tab') | INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME | **TABLE_OR_VIEW_NOT_FOUND** | Behavior changed | -| 120 | SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** | Behavior changed | -| 121 | SELECT * FROM IDENTIFIER('s').tab | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** | Behavior changed | -| 122 | SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win... | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | -| 123 | SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win')... | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | -| 126 | WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | -| 127 | CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | -| 129 | CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | -| 130 | INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** | Behavior changed | - ---- - -## Legacy Mode Differences - -### 47 Tests with Different Behavior in Legacy Mode - -| Query# | identifier-clause | identifier-clause-legacy | SQL Text | -|--------|-------------------|--------------------------|----------| -| 114 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW w... | -| 115 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIF... | -| 117 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | -| 118 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | -| 119 | TABLE_OR_VIEW_NOT_FOUND | PARSE_SYNTAX_ERROR | SELECT * FROM s.IDENTIFIER('tab') | -| 120 | TABLE_OR_VIEW_NOT_FOUND | PARSE_SYNTAX_ERROR | SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') | -| 121 | TABLE_OR_VIEW_NOT_FOUND | PARSE_SYNTAX_ERROR | SELECT * FROM IDENTIFIER('s').tab | -| 122 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win... | -| 123 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win')... | -| 124 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT 1 AS IDENTIFIER('col1') | -| 125 | SUCCESS | PARSE_SYNTAX_ERROR | SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1... | -| 126 | SUCCESS | PARSE_SYNTAX_ERROR | WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) | -| 127 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) | -| 128 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | SELECT c1 FROM v | -| 129 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV | -| 130 | SUCCESS | PARSE_SYNTAX_ERROR | INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) | -| 131 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | SELECT c1 FROM tab | -| 132 | UNSUPPORTED_FEATURE.TABLE_OPERATION | PARSE_SYNTAX_ERROR | ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('c... | -| 133 | UNRESOLVED_COLUMN.WITH_SUGGESTION | TABLE_OR_VIEW_NOT_FOUND | SELECT col1 FROM tab | -| 134 | SUCCESS | PARSE_SYNTAX_ERROR | ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT | -| 135 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | SELECT c2 FROM tab | -| 136 | UNSUPPORTED_FEATURE.TABLE_OPERATION | PARSE_SYNTAX_ERROR | ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') | -| 137 | SUCCESS | PARSE_SYNTAX_ERROR | ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') | -| 138 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | SELECT * FROM tab_renamed | -| 139 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV | -| 141 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) | -| 142 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | SELECT 1 AS IDENTIFIER('col1.col2') | -| 156 | SUCCESS | PARSE_SYNTAX_ERROR | ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' | -| 167 | SUCCESS | PARSE_SYNTAX_ERROR | SET VAR IDENTIFIER('my_var') = 'new_value' | -| 170 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('para... | -| 171 | SUCCESS | UNRESOLVED_ROUTINE | SELECT test_udf(5, 'hello') | -| 172 | SUCCESS | ROUTINE_NOT_FOUND | DROP TEMPORARY FUNCTION test_udf | -| 173 | SUCCESS | PARSE_SYNTAX_ERROR | CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) RETURNS... | -| 174 | SUCCESS | UNRESOLVABLE_TABLE_VALUED_FUNCTION | SELECT * FROM test_table_udf(42) | -| 175 | SUCCESS | ROUTINE_NOT_FOUND | DROP TEMPORARY FUNCTION test_table_udf | -| 206 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(... | -| 210 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER... | -| 211 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1''))... | -| 212 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FR... | -| 213 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(ID... | -| 214 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' USING 't... | -| 215 | SUCCESS | TABLE_OR_VIEW_NOT_FOUND | DROP VIEW test_view | -| 216 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_co... | -| 217 | UNSUPPORTED_FEATURE.TABLE_OPERATION | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old... | -| 218 | SUCCESS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' USING... | -| 219 | UNRESOLVED_COLUMN.WITH_SUGGESTION | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS... | -| 225 | IDENTIFIER_TOO_MANY_NAME_PARTS | PARSE_SYNTAX_ERROR | EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' USING 'defau... | - ---- - -## Summary Statistics - -- **Regression Check**: 13 tests changed from master -- **New Functionality**: 99 new tests added -- **Legacy Compatibility**: 47 tests differ between modes (20.7%) -- **Stability**: 115 existing tests unchanged (50.7%) diff --git a/identifier-clause-comparison.csv b/identifier-clause-comparison.csv deleted file mode 100644 index 89c33a385ad9..000000000000 --- a/identifier-clause-comparison.csv +++ /dev/null @@ -1,274 +0,0 @@ -Query#,SQL Text,Master,identifier-clause,identifier-clause-legacy -1,SET hivevar:colname = 'c',SUCCESS,SUCCESS,SUCCESS -2,SELECT IDENTIFIER(${colname} || '_1') FROM VALUES(1) AS T(c_1),SUCCESS,SUCCESS,SUCCESS -3,SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -4,SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -5,SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -6,SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`),SUCCESS,SUCCESS,SUCCESS -7,SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``),SUCCESS,SUCCESS,SUCCESS -8,SELECT IDENTIFIER('c' || '1') FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -9,CREATE SCHEMA IF NOT EXISTS s,SUCCESS,SUCCESS,SUCCESS -10,CREATE TABLE s.tab(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -11,USE SCHEMA s,SUCCESS,SUCCESS,SUCCESS -12,INSERT INTO IDENTIFIER('ta' || 'b') VALUES(1),SUCCESS,SUCCESS,SUCCESS -13,DELETE FROM IDENTIFIER('ta' || 'b') WHERE 1=0,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION -14,UPDATE IDENTIFIER('ta' || 'b') SET c1 = 2,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096 -15,"MERGE INTO IDENTIFIER('ta' || 'b') AS t USING IDENTIFIER('ta' || 'b') AS s ON s.c1 = t.c1 - WHEN MATCHED THEN UPDATE SET c1 = 3",_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096,_LEGACY_ERROR_TEMP_2096 -16,SELECT * FROM IDENTIFIER('tab'),SUCCESS,SUCCESS,SUCCESS -17,SELECT * FROM IDENTIFIER('s.tab'),SUCCESS,SUCCESS,SUCCESS -18,SELECT * FROM IDENTIFIER('`s`.`tab`'),SUCCESS,SUCCESS,SUCCESS -19,SELECT * FROM IDENTIFIER('t' || 'a' || 'b'),SUCCESS,SUCCESS,SUCCESS -20,USE SCHEMA default,SUCCESS,SUCCESS,SUCCESS -21,DROP TABLE s.tab,SUCCESS,SUCCESS,SUCCESS -22,DROP SCHEMA s,SUCCESS,SUCCESS,SUCCESS -23,"SELECT IDENTIFIER('COAL' || 'ESCE')(NULL, 1)",SUCCESS,SUCCESS,SUCCESS -24,SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -25,"SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1)",SUCCESS,SUCCESS,SUCCESS -26,CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -27,DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -28,CREATE SCHEMA identifier_clauses,SUCCESS,SUCCESS,SUCCESS -29,USE identifier_clauses,SUCCESS,SUCCESS,SUCCESS -30,CREATE TABLE IDENTIFIER('ta' || 'b')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -31,DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' || 'tab'),SUCCESS,SUCCESS,SUCCESS -32,CREATE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -33,REPLACE TABLE IDENTIFIER('identifier_clauses.' || 'tab')(c1 INT) USING CSV,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION,UNSUPPORTED_FEATURE.TABLE_OPERATION -34,CACHE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -35,UNCACHE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -36,DROP TABLE IF EXISTS IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -37,USE default,SUCCESS,SUCCESS,SUCCESS -38,DROP SCHEMA identifier_clauses,SUCCESS,SUCCESS,SUCCESS -39,CREATE TABLE tab(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -40,INSERT INTO tab VALUES (1),SUCCESS,SUCCESS,SUCCESS -41,SELECT c1 FROM tab,SUCCESS,SUCCESS,SUCCESS -42,DESCRIBE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -43,ANALYZE TABLE IDENTIFIER('ta' || 'b') COMPUTE STATISTICS,SUCCESS,SUCCESS,SUCCESS -44,ALTER TABLE IDENTIFIER('ta' || 'b') ADD COLUMN c2 INT,SUCCESS,SUCCESS,SUCCESS -45,SHOW TBLPROPERTIES IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -46,SHOW COLUMNS FROM IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -47,COMMENT ON TABLE IDENTIFIER('ta' || 'b') IS 'hello',SUCCESS,SUCCESS,SUCCESS -48,REFRESH TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -49,REPAIR TABLE IDENTIFIER('ta' || 'b'),NOT_A_PARTITIONED_TABLE,NOT_A_PARTITIONED_TABLE,NOT_A_PARTITIONED_TABLE -50,TRUNCATE TABLE IDENTIFIER('ta' || 'b'),SUCCESS,SUCCESS,SUCCESS -51,DROP TABLE IF EXISTS tab,SUCCESS,SUCCESS,SUCCESS -52,CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1),SUCCESS,SUCCESS,SUCCESS -53,SELECT * FROM v,SUCCESS,SUCCESS,SUCCESS -54,ALTER VIEW IDENTIFIER('v') AS VALUES(2),SUCCESS,SUCCESS,SUCCESS -55,DROP VIEW IDENTIFIER('v'),SUCCESS,SUCCESS,SUCCESS -56,CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1),SUCCESS,SUCCESS,SUCCESS -57,DROP VIEW IDENTIFIER('v'),SUCCESS,SUCCESS,SUCCESS -58,CREATE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -59,ALTER SCHEMA IDENTIFIER('id' || 'ent') SET PROPERTIES (somekey = 'somevalue'),SUCCESS,SUCCESS,SUCCESS -60,ALTER SCHEMA IDENTIFIER('id' || 'ent') SET LOCATION 'someloc',SUCCESS,SUCCESS,SUCCESS -61,COMMENT ON SCHEMA IDENTIFIER('id' || 'ent') IS 'some comment',SUCCESS,SUCCESS,SUCCESS -62,DESCRIBE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -63,SHOW TABLES IN IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -64,SHOW TABLE EXTENDED IN IDENTIFIER('id' || 'ent') LIKE 'hello',SUCCESS,SUCCESS,SUCCESS -65,USE IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -66,SHOW CURRENT SCHEMA,SUCCESS,SUCCESS,SUCCESS -67,USE SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -68,USE SCHEMA default,SUCCESS,SUCCESS,SUCCESS -69,DROP SCHEMA IDENTIFIER('id' || 'ent'),SUCCESS,SUCCESS,SUCCESS -70,CREATE SCHEMA ident,SUCCESS,SUCCESS,SUCCESS -71,CREATE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',SUCCESS,SUCCESS,SUCCESS -72,DESCRIBE FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS -73,REFRESH FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS -74,DROP FUNCTION IDENTIFIER('ident.' || 'myDoubleAvg'),SUCCESS,SUCCESS,SUCCESS -75,DROP SCHEMA ident,SUCCESS,SUCCESS,SUCCESS -76,CREATE TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',SUCCESS,SUCCESS,SUCCESS -77,DROP TEMPORARY FUNCTION IDENTIFIER('my' || 'DoubleAvg'),SUCCESS,SUCCESS,SUCCESS -78,DECLARE var = 'sometable',SUCCESS,SUCCESS,SUCCESS -79,CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV,SUCCESS,SUCCESS,SUCCESS -80,SET VAR var = 'c1',SUCCESS,SUCCESS,SUCCESS -81,SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1),SUCCESS,SUCCESS,SUCCESS -82,SET VAR var = 'some',SUCCESS,SUCCESS,SUCCESS -83,DROP TABLE IDENTIFIER(var || 'table'),SUCCESS,SUCCESS,SUCCESS -84,SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`),PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -85,SELECT IDENTIFIER('') FROM VALUES(1) AS T(``),PARSE_EMPTY_STATEMENT,PARSE_EMPTY_STATEMENT,PARSE_EMPTY_STATEMENT -86,VALUES(IDENTIFIER(CAST(NULL AS STRING))),NOT_A_CONSTANT_STRING.NULL,NOT_A_CONSTANT_STRING.NULL,NOT_A_CONSTANT_STRING.NULL -87,VALUES(IDENTIFIER(1)),NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE -88,"VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1)))",NOT_A_CONSTANT_STRING.NOT_CONSTANT,NOT_A_CONSTANT_STRING.NOT_CONSTANT,NOT_A_CONSTANT_STRING.NOT_CONSTANT -89,SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1),UNRESOLVED_ROUTINE,UNRESOLVED_ROUTINE,UNRESOLVED_ROUTINE -90,CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE -91,CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -92,CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -93,DROP TABLE IDENTIFIER('a.b.c'),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -94,DROP VIEW IDENTIFIER('a.b.c'),REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -95,COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello',REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE,REQUIRES_SINGLE_PART_NAMESPACE -96,VALUES(IDENTIFIER(1)()),NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE,NOT_A_CONSTANT_STRING.WRONG_TYPE -97,VALUES(IDENTIFIER('a.b.c.d')()),IDENTIFIER_TOO_MANY_NAME_PARTS,IDENTIFIER_TOO_MANY_NAME_PARTS,IDENTIFIER_TOO_MANY_NAME_PARTS -98,CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg',INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE,INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE,INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE -99,DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg'),INVALID_SQL_SYNTAX.MULTI_PART_NAME,INVALID_SQL_SYNTAX.MULTI_PART_NAME,INVALID_SQL_SYNTAX.MULTI_PART_NAME -100,CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1),TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS,TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS,TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS -101,"create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS -102,"cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS -103,"create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)",SUCCESS,SUCCESS,SUCCESS -104,insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1,SUCCESS,SUCCESS,SUCCESS -105,drop view v1,SUCCESS,SUCCESS,SUCCESS -106,drop table t1,SUCCESS,SUCCESS,SUCCESS -107,drop table t2,SUCCESS,SUCCESS,SUCCESS -108,DECLARE agg = 'max',SUCCESS,SUCCESS,SUCCESS -109,DECLARE col = 'c1',SUCCESS,SUCCESS,SUCCESS -110,DECLARE tab = 'T',SUCCESS,SUCCESS,SUCCESS -111,"WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), - T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) -SELECT IDENTIFIER(agg)(IDENTIFIER(col)) FROM IDENTIFIER(tab)",SUCCESS,SUCCESS,SUCCESS -112,"WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), - T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) -SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('T')",SUCCESS,SUCCESS,SUCCESS -113,"WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) -SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' || 'BC')",SUCCESS,SUCCESS,SUCCESS -114,SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1),PARSE_SYNTAX_ERROR,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -115,SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -116,SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1),UNRESOLVED_COLUMN.WITH_SUGGESTION,UNRESOLVED_COLUMN.WITH_SUGGESTION,UNRESOLVED_COLUMN.WITH_SUGGESTION -117,"SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)",PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -118,"SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)",PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -119,SELECT * FROM s.IDENTIFIER('tab'),INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR -120,SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab'),PARSE_SYNTAX_ERROR,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR -121,SELECT * FROM IDENTIFIER('s').tab,PARSE_SYNTAX_ERROR,TABLE_OR_VIEW_NOT_FOUND,PARSE_SYNTAX_ERROR -122,SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -123,SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -124,SELECT 1 AS IDENTIFIER('col1'),NEW,SUCCESS,PARSE_SYNTAX_ERROR -125,"SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2'))",NEW,SUCCESS,PARSE_SYNTAX_ERROR -126,WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -127,CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -128,SELECT c1 FROM v,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -129,CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV,PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -130,INSERT INTO tab(IDENTIFIER('c1')) VALUES(1),PARSE_SYNTAX_ERROR,SUCCESS,PARSE_SYNTAX_ERROR -131,SELECT c1 FROM tab,SUCCESS,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -132,ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1'),NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR -133,SELECT col1 FROM tab,NEW,UNRESOLVED_COLUMN.WITH_SUGGESTION,TABLE_OR_VIEW_NOT_FOUND -134,ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT,NEW,SUCCESS,PARSE_SYNTAX_ERROR -135,SELECT c2 FROM tab,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -136,ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2'),NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR -137,ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed'),NEW,SUCCESS,PARSE_SYNTAX_ERROR -138,SELECT * FROM tab_renamed,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -139,CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV,NEW,SUCCESS,PARSE_SYNTAX_ERROR -140,DROP TABLE IF EXISTS test_col_with_dot,NEW,SUCCESS,SUCCESS -141,"SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2)",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -142,SELECT 1 AS IDENTIFIER('col1.col2'),NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -143,"CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS -144,SHOW VIEWS IN IDENTIFIER('default'),NEW,SUCCESS,SUCCESS -145,SHOW PARTITIONS IDENTIFIER('test_show'),NEW,INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY,INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY -146,SHOW CREATE TABLE IDENTIFIER('test_show'),NEW,SUCCESS,SUCCESS -147,DROP TABLE test_show,NEW,SUCCESS,SUCCESS -148,CREATE TABLE test_desc(c1 INT) USING CSV,NEW,SUCCESS,SUCCESS -149,DESCRIBE TABLE IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS -150,DESCRIBE FORMATTED IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS -151,DESCRIBE EXTENDED IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS -152,DESC IDENTIFIER('test_desc'),NEW,SUCCESS,SUCCESS -153,DROP TABLE test_desc,NEW,SUCCESS,SUCCESS -154,"CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS -155,COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment',NEW,SUCCESS,SUCCESS -156,ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment',NEW,SUCCESS,PARSE_SYNTAX_ERROR -157,DROP TABLE test_comment,NEW,SUCCESS,SUCCESS -158,CREATE SCHEMA test_schema,NEW,SUCCESS,SUCCESS -159,CREATE TABLE test_schema.test_table(c1 INT) USING CSV,NEW,SUCCESS,SUCCESS -160,ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS,NEW,SUCCESS,SUCCESS -161,REFRESH TABLE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS -162,DESCRIBE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS -163,SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS -164,DROP TABLE IDENTIFIER('test_schema.test_table'),NEW,SUCCESS,SUCCESS -165,DROP SCHEMA test_schema,NEW,SUCCESS,SUCCESS -166,DECLARE IDENTIFIER('my_var') = 'value',NEW,SUCCESS,SUCCESS -167,SET VAR IDENTIFIER('my_var') = 'new_value',NEW,SUCCESS,PARSE_SYNTAX_ERROR -168,SELECT IDENTIFIER('my_var'),NEW,SUCCESS,SUCCESS -169,DROP TEMPORARY VARIABLE IDENTIFIER('my_var'),NEW,SUCCESS,SUCCESS -170,"CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) -RETURNS INT -RETURN IDENTIFIER('param1') + length(IDENTIFIER('param2'))",NEW,SUCCESS,PARSE_SYNTAX_ERROR -171,"SELECT test_udf(5, 'hello')",NEW,SUCCESS,UNRESOLVED_ROUTINE -172,DROP TEMPORARY FUNCTION test_udf,NEW,SUCCESS,ROUTINE_NOT_FOUND -173,"CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) -RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIER('col2') STRING) -RETURN SELECT IDENTIFIER('input_val'), 'result'",NEW,SUCCESS,PARSE_SYNTAX_ERROR -174,SELECT * FROM test_table_udf(42),NEW,SUCCESS,UNRESOLVABLE_TABLE_VALUED_FUNCTION -175,DROP TEMPORARY FUNCTION test_table_udf,NEW,SUCCESS,ROUTINE_NOT_FOUND -176,"BEGIN - IDENTIFIER('loop_label'): LOOP - SELECT 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -177,LEAVE IDENTIFIER('loop_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -178,END LOOP loop_label,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -179,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -180,"BEGIN - block_label: BEGIN - DECLARE IDENTIFIER('x') INT DEFAULT 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -181,SELECT x,NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION -182,END IDENTIFIER('block_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -183,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -184,"BEGIN - DECLARE IDENTIFIER('counter') INT DEFAULT 0",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -185,"IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO - SET VAR counter = IDENTIFIER('counter') + 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -186,END WHILE while_label,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -187,SELECT IDENTIFIER('counter'),NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION -188,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -189,"BEGIN - DECLARE IDENTIFIER('cnt') INT DEFAULT 0",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -190,"repeat_label: REPEAT - SET VAR IDENTIFIER('cnt') = cnt + 1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -191,"UNTIL IDENTIFIER('cnt') >= 2 - END REPEAT IDENTIFIER('repeat_label')",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -192,SELECT IDENTIFIER('cnt'),NEW,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION,UNRESOLVED_COLUMN.WITHOUT_SUGGESTION -193,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -194,"BEGIN - IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO - SELECT row.c1",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -195,END FOR IDENTIFIER('for_label'),NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -196,END,NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -197,EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab,NEW,INVALID_EXTRACT_BASE_FIELD_TYPE,INVALID_EXTRACT_BASE_FIELD_TYPE -198,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' - USING 'c1' AS col1",NEW,SUCCESS,SUCCESS -199,"CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV",NEW,SUCCESS,SUCCESS -200,"INSERT INTO integration_test VALUES (1, 'a'), (2, 'b')",NEW,SUCCESS,SUCCESS -201,"EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' - USING 'default' AS schema, 'integration_test' AS table",NEW,SUCCESS,SUCCESS -202,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' - USING 'c' AS prefix",NEW,SUCCESS,SUCCESS -203,"EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' - USING 'c1' AS col, 1 AS val",NEW,SUCCESS,SUCCESS -204,"CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV",NEW,SUCCESS,SUCCESS -205,"INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y')",NEW,SUCCESS,SUCCESS -206,"EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' - USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col",NEW,SUCCESS,PARSE_SYNTAX_ERROR -207,"EXECUTE IMMEDIATE - 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) ORDER BY IDENTIFIER(:ord)) as rn FROM integration_test' - USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord",NEW,SUCCESS,SUCCESS -208,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' - USING 'c' AS prefix, 'count' AS agg, 'c1' AS col",NEW,SUCCESS,SUCCESS -209,"EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' - USING 'c1' AS col1, 'c2' AS col2",NEW,SUCCESS,SUCCESS -210,"EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' - USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2",NEW,SUCCESS,PARSE_SYNTAX_ERROR -211,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' - USING 'default' AS schema, 'my_table' AS table, 't' AS alias",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -212,"EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' - USING 'my_cte' AS cte_name",NEW,SUCCESS,PARSE_SYNTAX_ERROR -213,"EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' - USING 'test_view' AS view_name, 'test_col' AS col_name",NEW,SUCCESS,PARSE_SYNTAX_ERROR -214,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' - USING 'test_col' AS col, 'test_view' AS view",NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -215,DROP VIEW test_view,NEW,SUCCESS,TABLE_OR_VIEW_NOT_FOUND -216,"EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' - USING 'integration_test' AS tab, 'c4' AS new_col",NEW,SUCCESS,PARSE_SYNTAX_ERROR -217,"EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' - USING 'integration_test' AS tab, 'c4' AS old_col, 'c5' AS new_col",NEW,UNSUPPORTED_FEATURE.TABLE_OPERATION,PARSE_SYNTAX_ERROR -218,"EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' - USING 'mykey' AS key, 42 AS val",NEW,SUCCESS,PARSE_SYNTAX_ERROR -219,"EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' - USING 't' AS alias",NEW,UNRESOLVED_COLUMN.WITH_SUGGESTION,PARSE_SYNTAX_ERROR -220,"EXECUTE IMMEDIATE - 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' - USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab",NEW,SUCCESS,SUCCESS -221,"EXECUTE IMMEDIATE - 'BEGIN - DECLARE IDENTIFIER(:var_name) INT DEFAULT :var_value",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -222,SELECT IDENTIFIER(:var_name) AS result,NEW,UNBOUND_SQL_PARAMETER,UNBOUND_SQL_PARAMETER -223,"END' - USING 'my_variable' AS var_name, 100 AS var_value",NEW,PARSE_SYNTAX_ERROR,PARSE_SYNTAX_ERROR -224,"EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0' - USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias",NEW,SUCCESS,SUCCESS -225,"EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' - USING 'default' AS schema, 'col1' AS col",NEW,IDENTIFIER_TOO_MANY_NAME_PARTS,PARSE_SYNTAX_ERROR -226,DROP TABLE integration_test,NEW,SUCCESS,SUCCESS -227,DROP TABLE integration_test2,NEW,SUCCESS,SUCCESS diff --git a/identifier-clause-comparison.md b/identifier-clause-comparison.md deleted file mode 100644 index 0011e5e0dd1d..000000000000 --- a/identifier-clause-comparison.md +++ /dev/null @@ -1,248 +0,0 @@ -# IDENTIFIER Clause Test Comparison - -Comprehensive comparison of IDENTIFIER clause behavior across different modes. - -- **Total Tests**: 227 -- **Tests from Master**: 128 -- **New Tests Added**: 99 -- **Tests Changed from Master**: 13 -- **Tests with Legacy Mode Differences**: 47 - -## Legend - -- **Query#**: Sequential test number -- **SQL Text**: The SQL query being tested -- **Master**: Result from master branch (before identifier-lite changes) -- **identifier-clause**: Result with identifier-lite enabled (default mode, current) -- **identifier-clause-legacy**: Result with `spark.sql.legacy.identifierClause=true` - -### Result Values - -- `SUCCESS`: Query executed successfully -- ``: Query failed with the specified error class -- `NEW`: Test did not exist in master baseline - ---- - -## Full Test Results - -| Query# | SQL Text | Master | identifier-clause | identifier-clause-legacy | -|--------|----------|--------|-------------------|--------------------------| -| 1 | SET hivevar:colname = 'c' | SUCCESS | SUCCESS | SUCCESS | -| 2 | SELECT IDENTIFIER(${colname} \|\| '_1') FROM VALUES(1) AS T(c_1) | SUCCESS | SUCCESS | SUCCESS | -| 3 | SELECT IDENTIFIER('c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 4 | SELECT IDENTIFIER('t.c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 5 | SELECT IDENTIFIER('`t`.c1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 6 | SELECT IDENTIFIER('`c 1`') FROM VALUES(1) AS T(`c 1`) | SUCCESS | SUCCESS | SUCCESS | -| 7 | SELECT IDENTIFIER('``') FROM VALUES(1) AS T(``) | SUCCESS | SUCCESS | SUCCESS | -| 8 | SELECT IDENTIFIER('c' \|\| '1') FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 9 | CREATE SCHEMA IF NOT EXISTS s | SUCCESS | SUCCESS | SUCCESS | -| 10 | CREATE TABLE s.tab(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 11 | USE SCHEMA s | SUCCESS | SUCCESS | SUCCESS | -| 12 | INSERT INTO IDENTIFIER('ta' \|\| 'b') VALUES(1) | SUCCESS | SUCCESS | SUCCESS | -| 13 | DELETE FROM IDENTIFIER('ta' \|\| 'b') WHERE 1=0 | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | -| 14 | UPDATE IDENTIFIER('ta' \|\| 'b') SET c1 = 2 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | -| 15 | MERGE INTO IDENTIFIER('ta' \|\| 'b') AS t USING IDENTIFIER('ta' \|\| 'b') AS s ON s.c1 = t.c1 WHEN MATCHED THEN UPD... | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | _LEGACY_ERROR_TEMP_2096 | -| 16 | SELECT * FROM IDENTIFIER('tab') | SUCCESS | SUCCESS | SUCCESS | -| 17 | SELECT * FROM IDENTIFIER('s.tab') | SUCCESS | SUCCESS | SUCCESS | -| 18 | SELECT * FROM IDENTIFIER('`s`.`tab`') | SUCCESS | SUCCESS | SUCCESS | -| 19 | SELECT * FROM IDENTIFIER('t' \|\| 'a' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 20 | USE SCHEMA default | SUCCESS | SUCCESS | SUCCESS | -| 21 | DROP TABLE s.tab | SUCCESS | SUCCESS | SUCCESS | -| 22 | DROP SCHEMA s | SUCCESS | SUCCESS | SUCCESS | -| 23 | SELECT IDENTIFIER('COAL' \|\| 'ESCE')(NULL, 1) | SUCCESS | SUCCESS | SUCCESS | -| 24 | SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 25 | SELECT * FROM IDENTIFIER('ra' \|\| 'nge')(0, 1) | SUCCESS | SUCCESS | SUCCESS | -| 26 | CREATE TABLE IDENTIFIER('tab')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 27 | DROP TABLE IF EXISTS IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 28 | CREATE SCHEMA identifier_clauses | SUCCESS | SUCCESS | SUCCESS | -| 29 | USE identifier_clauses | SUCCESS | SUCCESS | SUCCESS | -| 30 | CREATE TABLE IDENTIFIER('ta' \|\| 'b')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 31 | DROP TABLE IF EXISTS IDENTIFIER('identifier_clauses.' \|\| 'tab') | SUCCESS | SUCCESS | SUCCESS | -| 32 | CREATE TABLE IDENTIFIER('identifier_clauses.' \|\| 'tab')(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 33 | REPLACE TABLE IDENTIFIER('identifier_clauses.' \|\| 'tab')(c1 INT) USING CSV | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | UNSUPPORTED_FEATURE.TABLE_OPERATION | -| 34 | CACHE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 35 | UNCACHE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 36 | DROP TABLE IF EXISTS IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 37 | USE default | SUCCESS | SUCCESS | SUCCESS | -| 38 | DROP SCHEMA identifier_clauses | SUCCESS | SUCCESS | SUCCESS | -| 39 | CREATE TABLE tab(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 40 | INSERT INTO tab VALUES (1) | SUCCESS | SUCCESS | SUCCESS | -| 41 | SELECT c1 FROM tab | SUCCESS | SUCCESS | SUCCESS | -| 42 | DESCRIBE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 43 | ANALYZE TABLE IDENTIFIER('ta' \|\| 'b') COMPUTE STATISTICS | SUCCESS | SUCCESS | SUCCESS | -| 44 | ALTER TABLE IDENTIFIER('ta' \|\| 'b') ADD COLUMN c2 INT | SUCCESS | SUCCESS | SUCCESS | -| 45 | SHOW TBLPROPERTIES IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 46 | SHOW COLUMNS FROM IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 47 | COMMENT ON TABLE IDENTIFIER('ta' \|\| 'b') IS 'hello' | SUCCESS | SUCCESS | SUCCESS | -| 48 | REFRESH TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 49 | REPAIR TABLE IDENTIFIER('ta' \|\| 'b') | NOT_A_PARTITIONED_TABLE | NOT_A_PARTITIONED_TABLE | NOT_A_PARTITIONED_TABLE | -| 50 | TRUNCATE TABLE IDENTIFIER('ta' \|\| 'b') | SUCCESS | SUCCESS | SUCCESS | -| 51 | DROP TABLE IF EXISTS tab | SUCCESS | SUCCESS | SUCCESS | -| 52 | CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1) | SUCCESS | SUCCESS | SUCCESS | -| 53 | SELECT * FROM v | SUCCESS | SUCCESS | SUCCESS | -| 54 | ALTER VIEW IDENTIFIER('v') AS VALUES(2) | SUCCESS | SUCCESS | SUCCESS | -| 55 | DROP VIEW IDENTIFIER('v') | SUCCESS | SUCCESS | SUCCESS | -| 56 | CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1) | SUCCESS | SUCCESS | SUCCESS | -| 57 | DROP VIEW IDENTIFIER('v') | SUCCESS | SUCCESS | SUCCESS | -| 58 | CREATE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 59 | ALTER SCHEMA IDENTIFIER('id' \|\| 'ent') SET PROPERTIES (somekey = 'somevalue') | SUCCESS | SUCCESS | SUCCESS | -| 60 | ALTER SCHEMA IDENTIFIER('id' \|\| 'ent') SET LOCATION 'someloc' | SUCCESS | SUCCESS | SUCCESS | -| 61 | COMMENT ON SCHEMA IDENTIFIER('id' \|\| 'ent') IS 'some comment' | SUCCESS | SUCCESS | SUCCESS | -| 62 | DESCRIBE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 63 | SHOW TABLES IN IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 64 | SHOW TABLE EXTENDED IN IDENTIFIER('id' \|\| 'ent') LIKE 'hello' | SUCCESS | SUCCESS | SUCCESS | -| 65 | USE IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 66 | SHOW CURRENT SCHEMA | SUCCESS | SUCCESS | SUCCESS | -| 67 | USE SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 68 | USE SCHEMA default | SUCCESS | SUCCESS | SUCCESS | -| 69 | DROP SCHEMA IDENTIFIER('id' \|\| 'ent') | SUCCESS | SUCCESS | SUCCESS | -| 70 | CREATE SCHEMA ident | SUCCESS | SUCCESS | SUCCESS | -| 71 | CREATE FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | SUCCESS | SUCCESS | SUCCESS | -| 72 | DESCRIBE FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | -| 73 | REFRESH FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | -| 74 | DROP FUNCTION IDENTIFIER('ident.' \|\| 'myDoubleAvg') | SUCCESS | SUCCESS | SUCCESS | -| 75 | DROP SCHEMA ident | SUCCESS | SUCCESS | SUCCESS | -| 76 | CREATE TEMPORARY FUNCTION IDENTIFIER('my' \|\| 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | SUCCESS | SUCCESS | SUCCESS | -| 77 | DROP TEMPORARY FUNCTION IDENTIFIER('my' \|\| 'DoubleAvg') | SUCCESS | SUCCESS | SUCCESS | -| 78 | DECLARE var = 'sometable' | SUCCESS | SUCCESS | SUCCESS | -| 79 | CREATE TABLE IDENTIFIER(var)(c1 INT) USING CSV | SUCCESS | SUCCESS | SUCCESS | -| 80 | SET VAR var = 'c1' | SUCCESS | SUCCESS | SUCCESS | -| 81 | SELECT IDENTIFIER(var) FROM VALUES(1) AS T(c1) | SUCCESS | SUCCESS | SUCCESS | -| 82 | SET VAR var = 'some' | SUCCESS | SUCCESS | SUCCESS | -| 83 | DROP TABLE IDENTIFIER(var \|\| 'table') | SUCCESS | SUCCESS | SUCCESS | -| 84 | SELECT IDENTIFIER('c 1') FROM VALUES(1) AS T(`c 1`) | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 85 | SELECT IDENTIFIER('') FROM VALUES(1) AS T(``) | PARSE_EMPTY_STATEMENT | PARSE_EMPTY_STATEMENT | PARSE_EMPTY_STATEMENT | -| 86 | VALUES(IDENTIFIER(CAST(NULL AS STRING))) | NOT_A_CONSTANT_STRING.NULL | NOT_A_CONSTANT_STRING.NULL | NOT_A_CONSTANT_STRING.NULL | -| 87 | VALUES(IDENTIFIER(1)) | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | -| 88 | VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1))) | NOT_A_CONSTANT_STRING.NOT_CONSTANT | NOT_A_CONSTANT_STRING.NOT_CONSTANT | NOT_A_CONSTANT_STRING.NOT_CONSTANT | -| 89 | SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1) | UNRESOLVED_ROUTINE | UNRESOLVED_ROUTINE | UNRESOLVED_ROUTINE | -| 90 | CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | -| 91 | CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 92 | CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1) | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 93 | DROP TABLE IDENTIFIER('a.b.c') | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 94 | DROP VIEW IDENTIFIER('a.b.c') | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 95 | COMMENT ON TABLE IDENTIFIER('a.b.c.d') IS 'hello' | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | REQUIRES_SINGLE_PART_NAMESPACE | -| 96 | VALUES(IDENTIFIER(1)()) | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | NOT_A_CONSTANT_STRING.WRONG_TYPE | -| 97 | VALUES(IDENTIFIER('a.b.c.d')()) | IDENTIFIER_TOO_MANY_NAME_PARTS | IDENTIFIER_TOO_MANY_NAME_PARTS | IDENTIFIER_TOO_MANY_NAME_PARTS | -| 98 | CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' \|\| 'DoubleAvg') AS 'test.org.apache.spark.sql.MyDoubleAvg' | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE | -| 99 | DROP TEMPORARY FUNCTION IDENTIFIER('default.my' \|\| 'DoubleAvg') | INVALID_SQL_SYNTAX.MULTI_PART_NAME | INVALID_SQL_SYNTAX.MULTI_PART_NAME | INVALID_SQL_SYNTAX.MULTI_PART_NAME | -| 100 | CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1) | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS | -| 101 | create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | -| 102 | cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | -| 103 | create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1) | SUCCESS | SUCCESS | SUCCESS | -| 104 | insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1 | SUCCESS | SUCCESS | SUCCESS | -| 105 | drop view v1 | SUCCESS | SUCCESS | SUCCESS | -| 106 | drop table t1 | SUCCESS | SUCCESS | SUCCESS | -| 107 | drop table t2 | SUCCESS | SUCCESS | SUCCESS | -| 108 | DECLARE agg = 'max' | SUCCESS | SUCCESS | SUCCESS | -| 109 | DECLARE col = 'c1' | SUCCESS | SUCCESS | SUCCESS | -| 110 | DECLARE tab = 'T' | SUCCESS | SUCCESS | SUCCESS | -| 111 | WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) SELECT IDENTIFIER(agg)(ID... | SUCCESS | SUCCESS | SUCCESS | -| 112 | WITH S(c1, c2) AS (VALUES(1, 2), (2, 3)), T(c1, c2) AS (VALUES ('a', 'b'), ('c', 'd')) SELECT IDENTIFIER('max')(... | SUCCESS | SUCCESS | SUCCESS | -| 113 | WITH ABC(c1, c2) AS (VALUES(1, 2), (2, 3)) SELECT IDENTIFIER('max')(IDENTIFIER('c1')) FROM IDENTIFIER('A' \|\| 'BC') | SUCCESS | SUCCESS | SUCCESS | -| 114 | SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **IDENTIFIER_TOO_MANY_NAME_PARTS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 115 | SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 116 | SELECT IDENTIFIER('t').c1 FROM VALUES(1) AS T(c1) | UNRESOLVED_COLUMN.WITH_SUGGESTION | UNRESOLVED_COLUMN.WITH_SUGGESTION | UNRESOLVED_COLUMN.WITH_SUGGESTION | -| 117 | SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 118 | SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 119 | SELECT * FROM s.IDENTIFIER('tab') | INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 120 | SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 121 | SELECT * FROM IDENTIFIER('s').tab | PARSE_SYNTAX_ERROR | **TABLE_OR_VIEW_NOT_FOUND** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 122 | SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 123 | SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 124 | SELECT 1 AS IDENTIFIER('col1') | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 125 | SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 126 | WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 127 | CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 128 | SELECT c1 FROM v | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 129 | CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 130 | INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) | PARSE_SYNTAX_ERROR | **SUCCESS** ⚠️ | **PARSE_SYNTAX_ERROR** | -| 131 | SELECT c1 FROM tab | SUCCESS | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 132 | ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | -| 133 | SELECT col1 FROM tab | **NEW** | **UNRESOLVED_COLUMN.WITH_SUGGESTION** | **TABLE_OR_VIEW_NOT_FOUND** | -| 134 | ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 135 | SELECT c2 FROM tab | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 136 | ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | -| 137 | ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 138 | SELECT * FROM tab_renamed | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 139 | CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 140 | DROP TABLE IF EXISTS test_col_with_dot | **NEW** | SUCCESS | SUCCESS | -| 141 | SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 142 | SELECT 1 AS IDENTIFIER('col1.col2') | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 143 | CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 144 | SHOW VIEWS IN IDENTIFIER('default') | **NEW** | SUCCESS | SUCCESS | -| 145 | SHOW PARTITIONS IDENTIFIER('test_show') | **NEW** | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY | -| 146 | SHOW CREATE TABLE IDENTIFIER('test_show') | **NEW** | SUCCESS | SUCCESS | -| 147 | DROP TABLE test_show | **NEW** | SUCCESS | SUCCESS | -| 148 | CREATE TABLE test_desc(c1 INT) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 149 | DESCRIBE TABLE IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | -| 150 | DESCRIBE FORMATTED IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | -| 151 | DESCRIBE EXTENDED IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | -| 152 | DESC IDENTIFIER('test_desc') | **NEW** | SUCCESS | SUCCESS | -| 153 | DROP TABLE test_desc | **NEW** | SUCCESS | SUCCESS | -| 154 | CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 155 | COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' | **NEW** | SUCCESS | SUCCESS | -| 156 | ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 157 | DROP TABLE test_comment | **NEW** | SUCCESS | SUCCESS | -| 158 | CREATE SCHEMA test_schema | **NEW** | SUCCESS | SUCCESS | -| 159 | CREATE TABLE test_schema.test_table(c1 INT) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 160 | ANALYZE TABLE IDENTIFIER('test_schema.test_table') COMPUTE STATISTICS | **NEW** | SUCCESS | SUCCESS | -| 161 | REFRESH TABLE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | -| 162 | DESCRIBE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | -| 163 | SHOW COLUMNS FROM IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | -| 164 | DROP TABLE IDENTIFIER('test_schema.test_table') | **NEW** | SUCCESS | SUCCESS | -| 165 | DROP SCHEMA test_schema | **NEW** | SUCCESS | SUCCESS | -| 166 | DECLARE IDENTIFIER('my_var') = 'value' | **NEW** | SUCCESS | SUCCESS | -| 167 | SET VAR IDENTIFIER('my_var') = 'new_value' | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 168 | SELECT IDENTIFIER('my_var') | **NEW** | SUCCESS | SUCCESS | -| 169 | DROP TEMPORARY VARIABLE IDENTIFIER('my_var') | **NEW** | SUCCESS | SUCCESS | -| 170 | CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) RETURNS INT RETURN IDENTIFI... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 171 | SELECT test_udf(5, 'hello') | **NEW** | **SUCCESS** | **UNRESOLVED_ROUTINE** | -| 172 | DROP TEMPORARY FUNCTION test_udf | **NEW** | **SUCCESS** | **ROUTINE_NOT_FOUND** | -| 173 | CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) RETURNS TABLE(IDENTIFIER('col1') INT, IDENTIFIE... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 174 | SELECT * FROM test_table_udf(42) | **NEW** | **SUCCESS** | **UNRESOLVABLE_TABLE_VALUED_FUNCTION** | -| 175 | DROP TEMPORARY FUNCTION test_table_udf | **NEW** | **SUCCESS** | **ROUTINE_NOT_FOUND** | -| 176 | BEGIN IDENTIFIER('loop_label'): LOOP SELECT 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 177 | LEAVE IDENTIFIER('loop_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 178 | END LOOP loop_label | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 179 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 180 | BEGIN block_label: BEGIN DECLARE IDENTIFIER('x') INT DEFAULT 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 181 | SELECT x | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | -| 182 | END IDENTIFIER('block_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 183 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 184 | BEGIN DECLARE IDENTIFIER('counter') INT DEFAULT 0 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 185 | IDENTIFIER('while_label'): WHILE IDENTIFIER('counter') < 3 DO SET VAR counter = IDENTIFIER('counter') + 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 186 | END WHILE while_label | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 187 | SELECT IDENTIFIER('counter') | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | -| 188 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 189 | BEGIN DECLARE IDENTIFIER('cnt') INT DEFAULT 0 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 190 | repeat_label: REPEAT SET VAR IDENTIFIER('cnt') = cnt + 1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 191 | UNTIL IDENTIFIER('cnt') >= 2 END REPEAT IDENTIFIER('repeat_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 192 | SELECT IDENTIFIER('cnt') | **NEW** | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | UNRESOLVED_COLUMN.WITHOUT_SUGGESTION | -| 193 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 194 | BEGIN IDENTIFIER('for_label'): FOR IDENTIFIER('row') AS SELECT 1 AS c1 DO SELECT row.c1 | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 195 | END FOR IDENTIFIER('for_label') | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 196 | END | **NEW** | PARSE_SYNTAX_ERROR | PARSE_SYNTAX_ERROR | -| 197 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:tab \'b\').c1 FROM VALUES(1) AS tab(c1)' USING 'ta' AS tab | **NEW** | INVALID_EXTRACT_BASE_FIELD_TYPE | INVALID_EXTRACT_BASE_FIELD_TYPE | -| 198 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c2'', 42)) AS T(c1)' USING 'c1' AS col1 | **NEW** | SUCCESS | SUCCESS | -| 199 | CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 200 | INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') | **NEW** | SUCCESS | SUCCESS | -| 201 | EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' USING 'default' AS schema, 'integration_test' AS... | **NEW** | SUCCESS | SUCCESS | -| 202 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' USING 'c' AS ... | **NEW** | SUCCESS | SUCCESS | -| 203 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' USING 'c1' AS col, 1 AS val | **NEW** | SUCCESS | SUCCESS | -| 204 | CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV | **NEW** | SUCCESS | SUCCESS | -| 205 | INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') | **NEW** | SUCCESS | SUCCESS | -| 206 | EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' USIN... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 207 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:col2), row_number() OVER (PARTITION BY IDENTIFIER(:part) O... | **NEW** | SUCCESS | SUCCESS | -| 208 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP B... | **NEW** | SUCCESS | SUCCESS | -| 209 | EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DESC, IDENTIFIER(:col2)' USING 'c1' AS... | **NEW** | SUCCESS | SUCCESS | -| 210 | EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' USING ... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 211 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) ... | **NEW** | **IDENTIFIER_TOO_MANY_NAME_PARTS** | **PARSE_SYNTAX_ERROR** | -| 212 | EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' USING 'my_ct... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 213 | EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS VALUES(1)' USI... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 214 | EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' USING 'test_col' AS col, 'test_view' AS view | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 215 | DROP VIEW test_view | **NEW** | **SUCCESS** | **TABLE_OR_VIEW_NOT_FOUND** | -| 216 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' USING 'integration_test' AS ta... | **NEW** | **SUCCESS** | **PARSE_SYNTAX_ERROR** | -| 217 | EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:new_col)' USING '... | **NEW** | **UNSUPPORTED_FEATURE.TABLE_OPERATION** | **PARSE_SYNTAX_ERROR** | -| 218 | EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' USING 'mykey' AS key, 42 AS val | ** \ No newline at end of file diff --git a/identifier-clause-differences.txt b/identifier-clause-differences.txt deleted file mode 100644 index ffa9dffcac89..000000000000 --- a/identifier-clause-differences.txt +++ /dev/null @@ -1,364 +0,0 @@ -======================================================================================================================== -IDENTIFIER CLAUSE: DETAILED COMPARISON OF DIFFERENCES -======================================================================================================================== - -Total tests: 227 -Tests with different behavior: 47 -Tests with identical behavior: 180 - -======================================================================================================================== -TESTS WITH DIFFERENT BEHAVIOR -======================================================================================================================== - -Query #114: - SQL: SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) - Master: NEW - identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #115: - SQL: SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1')) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #117: - SQL: SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #118: - SQL: SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #119: - SQL: SELECT * FROM s.IDENTIFIER('tab') - Master: NEW - identifier-clause: TABLE_OR_VIEW_NOT_FOUND - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #120: - SQL: SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab') - Master: NEW - identifier-clause: TABLE_OR_VIEW_NOT_FOUND - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #121: - SQL: SELECT * FROM IDENTIFIER('s').tab - Master: NEW - identifier-clause: TABLE_OR_VIEW_NOT_FOUND - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #122: - SQL: SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #123: - SQL: SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER('win') AS (ORDER BY c1) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #124: - SQL: SELECT 1 AS IDENTIFIER('col1') - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #125: - SQL: SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIFIER('c1'), IDENTIFIER('c2')) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #126: - SQL: WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM v) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #127: - SQL: CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #128: - SQL: SELECT c1 FROM v - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND - -Query #129: - SQL: CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #130: - SQL: INSERT INTO tab(IDENTIFIER('c1')) VALUES(1) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #131: - SQL: SELECT c1 FROM tab - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND - -Query #132: - SQL: ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTIFIER('col1') - Master: NEW - identifier-clause: UNSUPPORTED_FEATURE.TABLE_OPERATION - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #133: - SQL: SELECT col1 FROM tab - Master: NEW - identifier-clause: UNRESOLVED_COLUMN.WITH_SUGGESTION - identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND - -Query #134: - SQL: ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #135: - SQL: SELECT c2 FROM tab - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND - -Query #136: - SQL: ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2') - Master: NEW - identifier-clause: UNSUPPORTED_FEATURE.TABLE_OPERATION - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #137: - SQL: ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed') - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #138: - SQL: SELECT * FROM tab_renamed - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND - -Query #139: - SQL: CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING CSV - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #141: - SQL: SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2) - Master: NEW - identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #142: - SQL: SELECT 1 AS IDENTIFIER('col1.col2') - Master: NEW - identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #156: - SQL: ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #167: - SQL: SET VAR IDENTIFIER('my_var') = 'new_value' - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #170: - SQL: CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIER('param2') STRING) -RETURNS IN... - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #171: - SQL: SELECT test_udf(5, 'hello') - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: UNRESOLVED_ROUTINE - -Query #172: - SQL: DROP TEMPORARY FUNCTION test_udf - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: ROUTINE_NOT_FOUND - -Query #173: - SQL: CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) -RETURNS TABLE(IDENTIFIER('col1... - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #174: - SQL: SELECT * FROM test_table_udf(42) - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: UNRESOLVABLE_TABLE_VALUED_FUNCTION - -Query #175: - SQL: DROP TEMPORARY FUNCTION test_table_udf - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: ROUTINE_NOT_FOUND - -Query #206: - SQL: EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFI... - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #210: - SQL: EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1,... - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #211: - SQL: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_stru... - Master: NEW - identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #212: - SQL: EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name... - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #213: - SQL: EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_name)(IDENTIFIER(:col_name)) AS... - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #214: - SQL: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' - USING 'test_col' AS col, 'test_... - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND - -Query #215: - SQL: DROP VIEW test_view - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: TABLE_OR_VIEW_NOT_FOUND - -Query #216: - SQL: EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) INT' - USING 'integr... - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #217: - SQL: EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFIER(:old_col) TO IDENTIFIER(:ne... - Master: NEW - identifier-clause: UNSUPPORTED_FEATURE.TABLE_OPERATION - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #218: - SQL: EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' - USING 'mykey' AS key, 42 AS ... - Master: NEW - identifier-clause: SUCCESS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #219: - SQL: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' - ... - Master: NEW - identifier-clause: UNRESOLVED_COLUMN.WITH_SUGGESTION - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -Query #225: - SQL: EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' - USING 'default' AS schema, 'col1' A... - Master: NEW - identifier-clause: IDENTIFIER_TOO_MANY_NAME_PARTS - identifier-clause-legacy: PARSE_SYNTAX_ERROR - -======================================================================================================================== -PATTERN ANALYSIS -======================================================================================================================== - - -Parse errors → Success: 24 tests --------------------------------------------------------------------------------- - #115: SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (... - #117: SELECT map('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)... - #118: SELECT named_struct('a', 1).IDENTIFIER('a') FROM VALUES(1) AS T(c1)... - #122: SELECT row_number() OVER IDENTIFIER('win') FROM VALUES(1) AS T(c1) WIN... - #123: SELECT row_number() OVER win FROM VALUES(1) AS T(c1) WINDOW IDENTIFIER... - #124: SELECT 1 AS IDENTIFIER('col1')... - #125: SELECT my_table.* FROM VALUES (1, 2) AS IDENTIFIER('my_table')(IDENTIF... - #126: WITH identifier('v')(identifier('c1')) AS (VALUES(1)) (SELECT c1 FROM ... - #127: CREATE OR REPLACE VIEW v(IDENTIFIER('c1')) AS VALUES(1)... - #129: CREATE TABLE tab(IDENTIFIER('c1') INT) USING CSV... - #130: INSERT INTO tab(IDENTIFIER('c1')) VALUES(1)... - #134: ALTER TABLE IDENTIFIER('tab') ADD COLUMN IDENTIFIER('c2') INT... - #137: ALTER TABLE IDENTIFIER('tab') RENAME TO IDENTIFIER('tab_renamed')... - #139: CREATE TABLE test_col_with_dot(IDENTIFIER('`col.with.dot`') INT) USING... - #156: ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column... - #167: SET VAR IDENTIFIER('my_var') = 'new_value'... - #170: CREATE TEMPORARY FUNCTION test_udf(IDENTIFIER('param1') INT, IDENTIFIE... - #173: CREATE TEMPORARY FUNCTION test_table_udf(IDENTIFIER('input_val') INT) -... - #206: EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDEN... - #210: EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDE... - #212: EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELEC... - #213: EXECUTE IMMEDIATE 'CREATE OR REPLACE TEMPORARY VIEW IDENTIFIER(:view_n... - #216: EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(... - #218: EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' -... - -Parse errors → Different error: 12 tests --------------------------------------------------------------------------------- - #114: SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) W... - #119: SELECT * FROM s.IDENTIFIER('tab')... - #120: SELECT * FROM IDENTIFIER('s').IDENTIFIER('tab')... - #121: SELECT * FROM IDENTIFIER('s').tab... - #132: ALTER TABLE IDENTIFIER('tab') RENAME COLUMN IDENTIFIER('c1') TO IDENTI... - #136: ALTER TABLE IDENTIFIER('tab') DROP COLUMN IDENTIFIER('c2')... - #141: SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2)... - #142: SELECT 1 AS IDENTIFIER('col1.col2')... - #211: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''... - #217: EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) RENAME COLUMN IDENTIFI... - #219: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_... - #225: EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' - USING... - -Success → Parse errors: 10 tests --------------------------------------------------------------------------------- - #128: SELECT c1 FROM v... - #131: SELECT c1 FROM tab... - #135: SELECT c2 FROM tab... - #138: SELECT * FROM tab_renamed... - #171: SELECT test_udf(5, 'hello')... - #172: DROP TEMPORARY FUNCTION test_udf... - #174: SELECT * FROM test_table_udf(42)... - #175: DROP TEMPORARY FUNCTION test_table_udf... - #214: EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col) FROM IDENTIFIER(:view)' - U... - #215: DROP VIEW test_view... diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate-legacy-identifier.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate-legacy-identifier.sql.out deleted file mode 100644 index 1271f730d1e5..000000000000 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate-legacy-identifier.sql.out +++ /dev/null @@ -1,1226 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- !query -CREATE TEMPORARY VIEW tbl_view AS SELECT * FROM VALUES - (10, 'name1', named_struct('f1', 1, 's2', named_struct('f2', 101, 'f3', 'a'))), - (20, 'name2', named_struct('f1', 2, 's2', named_struct('f2', 202, 'f3', 'b'))), - (30, 'name3', named_struct('f1', 3, 's2', named_struct('f2', 303, 'f3', 'c'))), - (40, 'name4', named_struct('f1', 4, 's2', named_struct('f2', 404, 'f3', 'd'))), - (50, 'name5', named_struct('f1', 5, 's2', named_struct('f2', 505, 'f3', 'e'))), - (60, 'name6', named_struct('f1', 6, 's2', named_struct('f2', 606, 'f3', 'f'))), - (70, 'name7', named_struct('f1', 7, 's2', named_struct('f2', 707, 'f3', 'g'))) -AS tbl_view(id, name, data) --- !query analysis -CreateViewCommand `tbl_view`, SELECT * FROM VALUES - (10, 'name1', named_struct('f1', 1, 's2', named_struct('f2', 101, 'f3', 'a'))), - (20, 'name2', named_struct('f1', 2, 's2', named_struct('f2', 202, 'f3', 'b'))), - (30, 'name3', named_struct('f1', 3, 's2', named_struct('f2', 303, 'f3', 'c'))), - (40, 'name4', named_struct('f1', 4, 's2', named_struct('f2', 404, 'f3', 'd'))), - (50, 'name5', named_struct('f1', 5, 's2', named_struct('f2', 505, 'f3', 'e'))), - (60, 'name6', named_struct('f1', 6, 's2', named_struct('f2', 606, 'f3', 'f'))), - (70, 'name7', named_struct('f1', 7, 's2', named_struct('f2', 707, 'f3', 'g'))) -AS tbl_view(id, name, data), false, false, LocalTempView, UNSUPPORTED, true - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -CREATE TABLE x (id INT) USING csv --- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`x`, false - - --- !query -DECLARE sql_string STRING --- !query analysis -CreateVariable defaultvalueexpression(null, null), false -+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.sql_string - - --- !query -SET VAR sql_string = 'SELECT * from tbl_view where name = \'name1\'' --- !query analysis -SetVariable [variablereference(system.session.sql_string=CAST(NULL AS STRING))] -+- Project [SELECT * from tbl_view where name = 'name1' AS sql_string#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SET spark.sql.ansi.enabled=true' --- !query analysis -CommandResult [key#x, value#x], Execute SetCommand, [[spark.sql.ansi.enabled,true]] - +- SetCommand (spark.sql.ansi.enabled,Some(true)) - - --- !query -EXECUTE IMMEDIATE 'CREATE TEMPORARY VIEW IDENTIFIER(:tblName) AS SELECT id, name FROM tbl_view' USING 'tbl_view_tmp' as tblName --- !query analysis -CommandResult Execute CreateViewCommand - +- CreateViewCommand `tbl_view_tmp`, SELECT id, name FROM tbl_view, false, false, LocalTempView, UNSUPPORTED, true - +- Project [id#x, name#x] - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT * FROM tbl_view_tmp' --- !query analysis -Project [id#x, name#x] -+- SubqueryAlias tbl_view_tmp - +- View (`tbl_view_tmp`, [id#x, name#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x] - +- Project [id#x, name#x] - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'REFRESH TABLE IDENTIFIER(:tblName)' USING 'x' as tblName --- !query analysis -CommandResult Execute RefreshTableCommand - +- RefreshTableCommand `spark_catalog`.`default`.`x` - - --- !query -EXECUTE IMMEDIATE sql_string --- !query analysis -Project [id#x, name#x, data#x] -+- Filter (name#x = name1) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = \'name1\'' --- !query analysis -Project [id#x, name#x, data#x] -+- Filter (name#x = name1) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -SET VAR sql_string = 'SELECT * from tbl_view where name = ? or name = ?' --- !query analysis -SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = \'name1\'')] -+- Project [SELECT * from tbl_view where name = ? or name = ? AS sql_string#x] - +- OneRowRelation - - --- !query -DECLARE a STRING --- !query analysis -CreateVariable defaultvalueexpression(null, null), false -+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.a - - --- !query -SET VAR a = 'name1' --- !query analysis -SetVariable [variablereference(system.session.a=CAST(NULL AS STRING))] -+- Project [name1 AS a#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE sql_string USING 'name1', 'name3' --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((name#x = name1) OR (name#x = name3)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE sql_string USING a, 'name2' --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((name#x = name1) OR (name#x = name2)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = ? or name = ?' USING 'name1', 'name3' --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((name#x = name1) OR (name#x = name3)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = ? or name = ?' USING a, 'name2' --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((name#x = name1) OR (name#x = name2)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = ? or name = ?' USING (a, 'name2') --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((name#x = name1) OR (name#x = name2)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'INSERT INTO x VALUES(?)' USING 1 --- !query analysis -CommandResult Execute InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/x, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/x], Append, `spark_catalog`.`default`.`x`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/x), [id] - +- InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/x, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/x], Append, `spark_catalog`.`default`.`x`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/x), [id] - +- Project [col1#x AS id#x] - +- LocalRelation [col1#x] - - --- !query -SELECT * from x --- !query analysis -Project [id#x] -+- SubqueryAlias spark_catalog.default.x - +- Relation spark_catalog.default.x[id#x] csv - - --- !query -SET VAR sql_string = 'SELECT * from tbl_view where name = :first or id = :second' --- !query analysis -SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = ? or name = ?')] -+- Project [SELECT * from tbl_view where name = :first or id = :second AS sql_string#x] - +- OneRowRelation - - --- !query -DECLARE b INT --- !query analysis -CreateVariable defaultvalueexpression(null, null), false -+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.b - - --- !query -SET VAR b = 40 --- !query analysis -SetVariable [variablereference(system.session.b=CAST(NULL AS INT))] -+- Project [40 AS b#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE sql_string USING 40 as second, 'name7' as first --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((name#x = name7) OR (id#x = 40)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE sql_string USING b as second, 'name7' as first --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((name#x = name7) OR (id#x = 40)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first or id = :second' USING 40 as second, 'name7' as first --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((name#x = name7) OR (id#x = 40)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first or id = :second' USING 'name7' as first, b as second --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((name#x = name7) OR (id#x = 40)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT tbl_view.*, :first as p FROM tbl_view WHERE name = :first' USING 'name7' as first --- !query analysis -Project [id#x, name#x, data#x, name7 AS p#x] -+- Filter (name#x = name7) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SET VAR sql_string = ?' USING 'SELECT id from tbl_view where name = :first' --- !query analysis -CommandResult SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = :first or id = :second')] - +- SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = :first or id = :second')] - +- Project [SELECT id from tbl_view where name = :first AS sql_string#x] - +- OneRowRelation - - --- !query -SELECT sql_string --- !query analysis -Project [variablereference(system.session.sql_string='SELECT id from tbl_view where name = :first') AS sql_string#x] -+- OneRowRelation - - --- !query -DECLARE res_id INT --- !query analysis -CreateVariable defaultvalueexpression(null, null), false -+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.res_id - - --- !query -EXECUTE IMMEDIATE sql_string INTO res_id USING 'name7' as first --- !query analysis -SetVariable [variablereference(system.session.res_id=CAST(NULL AS INT))] -+- GlobalLimit 2 - +- LocalLimit 2 - +- Project [id#x] - +- Filter (name#x = name7) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -SELECT res_id --- !query analysis -Project [variablereference(system.session.res_id=70) AS res_id#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE sql_string INTO res_id USING a as first --- !query analysis -SetVariable [variablereference(system.session.res_id=70)] -+- GlobalLimit 2 - +- LocalLimit 2 - +- Project [id#x] - +- Filter (name#x = name1) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -SELECT res_id --- !query analysis -Project [variablereference(system.session.res_id=10) AS res_id#x] -+- OneRowRelation - - --- !query -SET VAR sql_string = 'SELECT * from tbl_view where name = :first or id = :second' --- !query analysis -SetVariable [variablereference(system.session.sql_string='SELECT id from tbl_view where name = :first')] -+- Project [SELECT * from tbl_view where name = :first or id = :second AS sql_string#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT 42' INTO res_id --- !query analysis -SetVariable [variablereference(system.session.res_id=10)] -+- Project [42 AS 42#x] - +- OneRowRelation - - --- !query -SELECT res_id --- !query analysis -Project [variablereference(system.session.res_id=42) AS res_id#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO b, a USING 10 --- !query analysis -SetVariable [variablereference(system.session.b=40), variablereference(system.session.a='name1')] -+- GlobalLimit 2 - +- LocalLimit 2 - +- Project [id#x, name#x] - +- Filter (id#x = 10) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -SELECT b, a --- !query analysis -Project [variablereference(system.session.b=10) AS b#x, variablereference(system.session.a='name1') AS a#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where id = ? AND name = ?' USING b as first, a --- !query analysis -Project [id#x, name#x, data#x] -+- Filter ((id#x = 10) AND (name#x = name1)) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT 42 WHERE 2 = 1' INTO res_id --- !query analysis -SetVariable [variablereference(system.session.res_id=42)] -+- Project [42 AS 42#x] - +- Filter (2 = 1) - +- OneRowRelation - - --- !query -SELECT res_id --- !query analysis -Project [variablereference(system.session.res_id=CAST(NULL AS INT)) AS res_id#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT \'1707\'' INTO res_id --- !query analysis -SetVariable [variablereference(system.session.res_id=CAST(NULL AS INT))] -+- Project [cast(1707#x as int) AS res_id#x] - +- Project [1707 AS 1707#x] - +- OneRowRelation - - --- !query -SELECT res_id --- !query analysis -Project [variablereference(system.session.res_id=1707) AS res_id#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT \'invalid_cast_error_expected\'' INTO res_id --- !query analysis -org.apache.spark.SparkNumberFormatException -{ - "errorClass" : "CAST_INVALID_INPUT", - "sqlState" : "22018", - "messageParameters" : { - "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "expression" : "'invalid_cast_error_expected'", - "sourceType" : "\"STRING\"", - "targetType" : "\"INT\"" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 70, - "fragment" : "EXECUTE IMMEDIATE 'SELECT \\'invalid_cast_error_expected\\'' INTO res_id" - } ] -} - - --- !query -EXECUTE IMMEDIATE 'INSERT INTO x VALUES (?)' INTO res_id USING 1 --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "INVALID_STATEMENT_FOR_EXECUTE_INTO", - "sqlState" : "07501", - "messageParameters" : { - "sqlString" : "INSERT INTO X VALUES (?)" - } -} - - --- !query -DECLARE OR REPLACE testvarA INT --- !query analysis -CreateVariable defaultvalueexpression(null, null), true -+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.testvarA - - --- !query -EXECUTE IMMEDIATE 'SET VAR testVarA = 1' INTO testVarA --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "INVALID_STATEMENT_FOR_EXECUTE_INTO", - "sqlState" : "07501", - "messageParameters" : { - "sqlString" : "SET VAR TESTVARA = 1" - } -} - - --- !query -EXECUTE IMMEDIATE 'SELECT * FROM tbl_view WHERE ? = id' USING id --- !query analysis -org.apache.spark.sql.catalyst.ExtendedAnalysisException -{ - "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", - "sqlState" : "42703", - "messageParameters" : { - "objectName" : "`id`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 63, - "stopIndex" : 64, - "fragment" : "id" - } ] -} - - --- !query -EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where ? = id and :first = name' USING 1 as x, 'name2' as first --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "INVALID_QUERY_MIXED_QUERY_PARAMETERS", - "sqlState" : "42613" -} - - --- !query -EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where :x = id and :first = name' USING 1, 'name2' as first --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "ALL_PARAMETERS_MUST_BE_NAMED", - "sqlState" : "07001", - "messageParameters" : { - "exprs" : "\"1\"" - } -} - - --- !query -EXECUTE IMMEDIATE 'SELECT * FROM tbl_view where :first = name' USING 1, 'name2' as first --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "ALL_PARAMETERS_MUST_BE_NAMED", - "sqlState" : "07001", - "messageParameters" : { - "exprs" : "\"1\"" - } -} - - --- !query -EXECUTE IMMEDIATE 'SELCT Fa' --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'SELCT'", - "hint" : "" - }, - "queryContext" : [ { - "objectType" : "EXECUTE IMMEDIATE", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 8, - "fragment" : "SELCT Fa" - } ] -} - - --- !query -EXECUTE IMMEDIATE 'SELCT Fa' INTO res_id --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'SELCT'", - "hint" : "" - }, - "queryContext" : [ { - "objectType" : "EXECUTE IMMEDIATE", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 8, - "fragment" : "SELCT Fa" - } ] -} - - --- !query -EXECUTE IMMEDIATE b --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE", - "sqlState" : "42K09", - "messageParameters" : { - "exprType" : "\"INT\"" - } -} - - --- !query -SET VAR sql_string = 'SELECT * from tbl_view where name = :first or id = :second' --- !query analysis -SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = :first or id = :second')] -+- Project [SELECT * from tbl_view where name = :first or id = :second AS sql_string#x] - +- OneRowRelation - - --- !query -SET VAR a = 'na' --- !query analysis -SetVariable [variablereference(system.session.a='name1')] -+- Project [na AS a#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first' USING CONCAT(a , "me1") as first --- !query analysis -Project [id#x, name#x, data#x] -+- Filter (name#x = name1) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT * from tbl_view where name = :first' USING (SELECT 42) as first, 'name2' as second --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "UNSUPPORTED_EXPR_FOR_PARAMETER", - "sqlState" : "42K0E", - "messageParameters" : { - "invalidExprSql" : "\"scalarsubquery()\"" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 70, - "stopIndex" : 80, - "fragment" : "(SELECT 42)" - } ] -} - - --- !query -EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO a, b USING 10 --- !query analysis -org.apache.spark.SparkNumberFormatException -{ - "errorClass" : "CAST_INVALID_INPUT", - "sqlState" : "22018", - "messageParameters" : { - "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "expression" : "'name1'", - "sourceType" : "\"STRING\"", - "targetType" : "\"INT\"" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 81, - "fragment" : "EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO a, b USING 10" - } ] -} - - --- !query -EXECUTE IMMEDIATE 'SELECT id, name FROM tbl_view WHERE id = ?' INTO (a, b) USING 10 --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'('", - "hint" : "" - } -} - - --- !query -EXECUTE IMMEDIATE 'SELECT id FROM tbl_view' INTO res_id --- !query analysis -org.apache.spark.SparkException -{ - "errorClass" : "ROW_SUBQUERY_TOO_MANY_ROWS", - "sqlState" : "21000" -} - - --- !query -EXECUTE IMMEDIATE 'SELECT id, data.f1 FROM tbl_view' INTO res_id --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "ASSIGNMENT_ARITY_MISMATCH", - "sqlState" : "42802", - "messageParameters" : { - "numExpr" : "2", - "numTarget" : "1" - } -} - - --- !query -EXECUTE IMMEDIATE 'SELECT id FROM tbl_view' INTO res_id, b --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "ASSIGNMENT_ARITY_MISMATCH", - "sqlState" : "42802", - "messageParameters" : { - "numExpr" : "1", - "numTarget" : "2" - } -} - - --- !query -EXECUTE IMMEDIATE 'SELECT id FROM tbl_view WHERE id = :first' USING 10 as first, 20 as first --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "EXEC_IMMEDIATE_DUPLICATE_ARGUMENT_ALIASES", - "sqlState" : "42701", - "messageParameters" : { - "aliases" : "`first`" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 63, - "stopIndex" : 92, - "fragment" : "USING 10 as first, 20 as first" - } ] -} - - --- !query -DECLARE p = 10 --- !query analysis -CreateVariable defaultvalueexpression(10, 10), false -+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.p - - --- !query -EXECUTE IMMEDIATE 'SELECT id FROM tbl_view WHERE id = :p' USING p --- !query analysis -Project [id#x] -+- Filter (id#x = 10) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -EXECUTE IMMEDIATE 'SELECT id FROM tbl_view WHERE id = :p' USING p, 'p' --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "ALL_PARAMETERS_MUST_BE_NAMED", - "sqlState" : "07001", - "messageParameters" : { - "exprs" : "\"p\"" - } -} - - --- !query -EXECUTE IMMEDIATE 'SELECT id, data.f1 FROM tbl_view WHERE id = 10' INTO res_id, res_id --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "DUPLICATE_ASSIGNMENTS", - "sqlState" : "42701", - "messageParameters" : { - "nameList" : "`res_id`" - } -} - - --- !query -EXECUTE IMMEDIATE 'EXECUTE IMMEDIATE \'SELECT id FROM tbl_view WHERE id = ?\' USING 10' --- !query analysis -Project [id#x] -+- Filter (id#x = 10) - +- SubqueryAlias tbl_view - +- View (`tbl_view`, [id#x, name#x, data#x]) - +- Project [cast(id#x as int) AS id#x, cast(name#x as string) AS name#x, cast(data#x as struct>) AS data#x] - +- Project [id#x, name#x, data#x] - +- SubqueryAlias tbl_view - +- LocalRelation [id#x, name#x, data#x] - - --- !query -SET VAR sql_string = null --- !query analysis -SetVariable [variablereference(system.session.sql_string='SELECT * from tbl_view where name = :first or id = :second')] -+- Project [cast(sql_string#x as string) AS sql_string#x] - +- Project [null AS sql_string#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE sql_string --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "NULL_QUERY_STRING_EXECUTE_IMMEDIATE", - "sqlState" : "22004", - "messageParameters" : { - "varName" : "`sql_string`" - } -} - - --- !query -SET VAR sql_string = 5 --- !query analysis -SetVariable [variablereference(system.session.sql_string=CAST(NULL AS STRING))] -+- Project [cast(sql_string#x as string) AS sql_string#x] - +- Project [5 AS sql_string#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE sql_string --- !query analysis -org.apache.spark.sql.catalyst.parser.ParseException -{ - "errorClass" : "PARSE_SYNTAX_ERROR", - "sqlState" : "42601", - "messageParameters" : { - "error" : "'5'", - "hint" : "" - }, - "queryContext" : [ { - "objectType" : "EXECUTE IMMEDIATE", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 1, - "fragment" : "5" - } ] -} - - --- !query -SET VAR sql_string = 'hello' --- !query analysis -SetVariable [variablereference(system.session.sql_string='5')] -+- Project [hello AS sql_string#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE length(sql_string) --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE", - "sqlState" : "42K09", - "messageParameters" : { - "exprType" : "\"INT\"" - } -} - - --- !query -EXECUTE IMMEDIATE 'SELECT 42 where ? = :first' USING 1, 2 as first --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "INVALID_QUERY_MIXED_QUERY_PARAMETERS", - "sqlState" : "42613" -} - - --- !query -DECLARE int_var INT --- !query analysis -CreateVariable defaultvalueexpression(null, null), false -+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.int_var - - --- !query -SET VAR int_var = 42 --- !query analysis -SetVariable [variablereference(system.session.int_var=CAST(NULL AS INT))] -+- Project [42 AS int_var#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE int_var --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE", - "sqlState" : "42K09", - "messageParameters" : { - "exprType" : "\"INT\"" - } -} - - --- !query -DECLARE null_var STRING --- !query analysis -CreateVariable defaultvalueexpression(null, null), false -+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.null_var - - --- !query -SET VAR null_var = null --- !query analysis -SetVariable [variablereference(system.session.null_var=CAST(NULL AS STRING))] -+- Project [cast(null_var#x as string) AS null_var#x] - +- Project [null AS null_var#x] - +- OneRowRelation - - --- !query -EXECUTE IMMEDIATE null_var --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "NULL_QUERY_STRING_EXECUTE_IMMEDIATE", - "sqlState" : "22004", - "messageParameters" : { - "varName" : "`null_var`" - } -} - - --- !query -EXECUTE IMMEDIATE 'SELECT ?' USING (SELECT 1) --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "UNSUPPORTED_EXPR_FOR_PARAMETER", - "sqlState" : "42K0E", - "messageParameters" : { - "invalidExprSql" : "\"scalarsubquery()\"" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 36, - "stopIndex" : 45, - "fragment" : "(SELECT 1)" - } ] -} - - --- !query -EXECUTE IMMEDIATE 'SELECT :first' USING 2, 3 --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "ALL_PARAMETERS_MUST_BE_NAMED", - "sqlState" : "07001", - "messageParameters" : { - "exprs" : "\"2\", \"3\"" - } -} - - --- !query -EXECUTE IMMEDIATE (SELECT c FROM (VALUES(1)) AS T(c)) --- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE", - "sqlState" : "42K09", - "messageParameters" : { - "exprType" : "\"INT\"" - } -} - - --- !query -DROP TABLE x --- !query analysis -DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.x - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 5 AS p --- !query analysis -Project [typeof(5) AS type#x, 5 AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 5L AS p --- !query analysis -Project [typeof(5) AS type#x, 5 AS val#xL] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 5S AS p --- !query analysis -Project [typeof(5) AS type#x, 5 AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 5Y AS p --- !query analysis -Project [typeof(5) AS type#x, 5 AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 3.14F AS p --- !query analysis -Project [typeof(cast(3.14 as float)) AS type#x, cast(3.14 as float) AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 3.14159D AS p --- !query analysis -Project [typeof(3.14159) AS type#x, 3.14159 AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 123.45BD AS p --- !query analysis -Project [typeof(123.45) AS type#x, 123.45 AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING true AS p --- !query analysis -Project [typeof(true) AS type#x, true AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING false AS p --- !query analysis -Project [typeof(false) AS type#x, false AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 'hello world' AS p --- !query analysis -Project [typeof(hello world) AS type#x, hello world AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 'it''s a test' AS p --- !query analysis -Project [typeof(it's a test) AS type#x, it's a test AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING DATE '2023-12-25' AS p --- !query analysis -[Analyzer test output redacted due to nondeterminism] - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING TIMESTAMP '2023-12-25 10:30:45' AS p --- !query analysis -[Analyzer test output redacted due to nondeterminism] - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING TIMESTAMP_NTZ '2023-12-25 10:30:45' AS p --- !query analysis -Project [typeof(2023-12-25 10:30:45) AS type#x, 2023-12-25 10:30:45 AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING CAST(NULL AS INT) AS p --- !query analysis -Project [typeof(cast(null as int)) AS type#x, cast(null as int) AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING CAST(NULL AS STRING) AS p --- !query analysis -Project [typeof(cast(null as string)) AS type#x, cast(null as string) AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, hex(:p) as val' USING X'010203FF' AS p --- !query analysis -Project [typeof(0x010203FF) AS type#x, hex(0x010203FF) AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING INTERVAL '3' DAY AS p --- !query analysis -Project [typeof(INTERVAL '3' DAY) AS type#x, INTERVAL '3' DAY AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING INTERVAL '2' YEAR AS p --- !query analysis -Project [typeof(INTERVAL '2' YEAR) AS type#x, INTERVAL '2' YEAR AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING INTERVAL '1-2' YEAR TO MONTH AS p --- !query analysis -Project [typeof(INTERVAL '1-2' YEAR TO MONTH) AS type#x, INTERVAL '1-2' YEAR TO MONTH AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING INTERVAL '3 4:5:6' DAY TO SECOND AS p --- !query analysis -Project [typeof(INTERVAL '3 04:05:06' DAY TO SECOND) AS type#x, INTERVAL '3 04:05:06' DAY TO SECOND AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING 999.999BD AS p --- !query analysis -Project [typeof(999.999) AS type#x, 999.999 AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p1) as type1, :p1 as val1, typeof(:p2) as type2, :p2 as val2' - USING 42 as p1, 'test string' as p2 --- !query analysis -Project [typeof(42) AS type1#x, 42 AS val1#x, typeof(test string) AS type2#x, test string AS val2#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING ARRAY(1, 2, 3) AS p --- !query analysis -Project [typeof(array(1, 2, 3)) AS type#x, array(1, 2, 3) AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING ARRAY('a', 'b', 'c') AS p --- !query analysis -Project [typeof(array(a, b, c)) AS type#x, array(a, b, c) AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING ARRAY(ARRAY(1, 2), ARRAY(3, 4)) AS p --- !query analysis -Project [typeof(array(array(1, 2), array(3, 4))) AS type#x, array(array(1, 2), array(3, 4)) AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING MAP('key1', 'value1', 'key2', 'value2') AS p --- !query analysis -Project [typeof(map(key1, value1, key2, value2)) AS type#x, map(key1, value1, key2, value2) AS val#x] -+- OneRowRelation - - --- !query -EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING MAP(1, 'one', 2, 'two') AS p --- !query analysis -Project [typeof(map(1, one, 2, two)) AS type#x, map(1, one, 2, two) AS val#x] -+- OneRowRelation From 733f8746ac809def3691af0a27bcee50f6c0e13b Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 5 Nov 2025 21:09:15 -0800 Subject: [PATCH 13/37] remove unecessary testcases from ParametersSuite move imports to head --- .../sql/catalyst/parser/AstBuilder.scala | 2 - .../sql/catalyst/parser/ParserUtils.scala | 2 - .../spark/sql/execution/SparkStrategies.scala | 10 +- .../apache/spark/sql/ParametersSuite.scala | 130 ------------------ 4 files changed, 4 insertions(+), 140 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index b355964a781d..d6227a4f3bb3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -6031,8 +6031,6 @@ class AstBuilder extends DataTypeAstBuilder * }}} */ override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) { - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - val query = Option(ctx.query).map(plan) withIdentClause(ctx.identifierReference, query.toSeq, (ident, children) => { if (query.isDefined && ident.length > 1) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index a14e77d6b558..fdaefd95ba57 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -121,8 +121,6 @@ object ParserUtils extends SparkParserUtils { * Mirrors the string extraction logic used in DataTypeAstBuilder. */ private def extractStringLiteralValue(ctx: SqlBaseParser.StringLitContext): String = { - import scala.jdk.CollectionConverters._ - if (ctx == null) { return "" } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 9df357d1f270..ef6eec86f7ef 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -23,6 +23,7 @@ import org.apache.spark.{SparkException, SparkUnsupportedOperationException} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{execution, AnalysisException} import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NamedRelation} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide, JoinSelectionHelper, NormalizeFloatingNumbers} @@ -31,12 +32,14 @@ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.streaming.{InternalOutputModes, StreamingRelationV2} import org.apache.spark.sql.catalyst.types.DataTypeUtils +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.execution.{SparkStrategy => Strategy} import org.apache.spark.sql.execution.aggregate.AggUtils import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} import org.apache.spark.sql.execution.command._ -import org.apache.spark.sql.execution.datasources.{WriteFiles, WriteFilesExec} +import org.apache.spark.sql.execution.datasources.{LogicalRelation, WriteFiles, WriteFilesExec} +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.execution.exchange.{REBALANCE_PARTITIONS_BY_COL, REBALANCE_PARTITIONS_BY_NONE, REPARTITION_BY_COL, REPARTITION_BY_NUM, ShuffleExchangeExec} import org.apache.spark.sql.execution.python._ import org.apache.spark.sql.execution.python.streaming.{FlatMapGroupsInPandasWithStateExec, TransformWithStateInPySparkExec} @@ -1112,11 +1115,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { * Extracts a user-friendly table name from a logical plan for error messages. */ private def extractTableNameForError(table: LogicalPlan): String = { - import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NamedRelation} - import org.apache.spark.sql.execution.datasources.LogicalRelation - import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ - val unwrapped = EliminateSubqueryAliases(table) unwrapped match { // Check specific types before NamedRelation since they extend it diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala index f66eb9df4caf..e30b48fdb176 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala @@ -2375,133 +2375,3 @@ class ParametersSuite extends QueryTest with SharedSparkSession { ) } } - -class IdentifierWithParametersSuite extends QueryTest with SharedSparkSession { - import testImplicits._ - - test("IDENTIFIER with parameter - table reference") { - // Test IDENTIFIER with parameters that get substituted before parse - // This tests: parameter substitution -> identifier-lite parse-time resolution - withTable("test_table") { - spark.range(5).write.saveAsTable("test_table") - - checkAnswer( - spark.sql("SELECT * FROM IDENTIFIER(:table_name) ORDER BY id", - Map("table_name" -> "test_table")), - Seq(Row(0), Row(1), Row(2), Row(3), Row(4)) - ) - } - } - - test("IDENTIFIER with parameter - column reference in SELECT list") { - // Test IDENTIFIER with parameters for column names in SELECT list - val df = Seq((1, "a"), (2, "b"), (3, "c")).toDF("col1", "col2") - df.createOrReplaceTempView("test_view") - - checkAnswer( - spark.sql("SELECT IDENTIFIER(:col_name) FROM test_view", - Map("col_name" -> "col1")), - Seq(Row(1), Row(2), Row(3)) - ) - - checkAnswer( - spark.sql("SELECT IDENTIFIER(:c1), IDENTIFIER(:c2) FROM test_view", - Map("c1" -> "col1", "c2" -> "col2")), - Seq(Row(1, "a"), Row(2, "b"), Row(3, "c")) - ) - } - - test("IDENTIFIER with parameter - qualified table name") { - // Test IDENTIFIER with parameters for qualified identifiers - withTable("test_qualified") { - spark.sql("CREATE TABLE test_qualified (c1 INT) USING parquet") - spark.sql("INSERT INTO test_qualified VALUES (42)") - - checkAnswer( - spark.sql("SELECT * FROM IDENTIFIER(:qual_table)", - Map("qual_table" -> "default.test_qualified")), - Seq(Row(42)) - ) - } - } - - test("IDENTIFIER with parameter - backticked identifier with spaces") { - // Test IDENTIFIER with parameters for backticked identifiers - val df = Seq((1, 2)).toDF("col 1", "col 2") - df.createOrReplaceTempView("test_view2") - - checkAnswer( - spark.sql("SELECT IDENTIFIER(:col_name) FROM test_view2", - Map("col_name" -> "`col 1`")), - Seq(Row(1)) - ) - } - - test("IDENTIFIER with parameter - DDL statements") { - // Test IDENTIFIER with parameters in CREATE/DROP TABLE statements - val tableName = "param_table" - withTable(tableName) { - spark.sql("CREATE TABLE IDENTIFIER(:tbl) (c1 INT) USING parquet", - Map("tbl" -> tableName)) - spark.sql("INSERT INTO IDENTIFIER(:tbl) VALUES (100)", - Map("tbl" -> tableName)) - - checkAnswer( - spark.sql("SELECT * FROM IDENTIFIER(:tbl)", Map("tbl" -> tableName)), - Seq(Row(100)) - ) - - spark.sql("DROP TABLE IDENTIFIER(:tbl)", Map("tbl" -> tableName)) - } - } - - test("IDENTIFIER with parameter - function names") { - // Test IDENTIFIER with parameters for function references - checkAnswer( - spark.sql("SELECT IDENTIFIER(:func_name)(-5)", Map("func_name" -> "abs")), - Seq(Row(5)) - ) - - checkAnswer( - spark.sql("SELECT IDENTIFIER(:func)('hello')", Map("func" -> "upper")), - Seq(Row("HELLO")) - ) - } - - test("IDENTIFIER with parameter - column reference in WHERE clause") { - // Test IDENTIFIER with parameters in WHERE clause - val df = Seq((1, "a"), (2, "b"), (3, "c")).toDF("col1", "col2") - df.createOrReplaceTempView("test_view3") - - checkAnswer( - spark.sql("SELECT * FROM test_view3 WHERE IDENTIFIER(:col) > 1", - Map("col" -> "col1")), - Seq(Row(2, "b"), Row(3, "c")) - ) - } - - test("IDENTIFIER with parameter - column reference in GROUP BY") { - // Test IDENTIFIER with parameters in GROUP BY clause - val df = Seq((1, "a"), (1, "b"), (2, "c")).toDF("col1", "col2") - df.createOrReplaceTempView("test_view4") - - checkAnswer( - spark.sql( - "SELECT IDENTIFIER(:col), COUNT(*) FROM test_view4 GROUP BY IDENTIFIER(:col)", - Map("col" -> "col1")), - Seq(Row(1, 2), Row(2, 1)) - ) - } - - test("IDENTIFIER with parameter - column reference in ORDER BY") { - // Test IDENTIFIER with parameters in ORDER BY clause - val df = Seq((3, "a"), (1, "b"), (2, "c")).toDF("col1", "col2") - df.createOrReplaceTempView("test_view5") - - checkAnswer( - spark.sql("SELECT * FROM test_view5 ORDER BY IDENTIFIER(:col)", - Map("col" -> "col1")), - Seq(Row(1, "b"), Row(2, "c"), Row(3, "a")) - ) - } -} From c43af8f62cec54cfaa57fa0039b628ecabd671a0 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 6 Nov 2025 07:44:53 -0800 Subject: [PATCH 14/37] Make identifier-clause.sql less flaky --- .../identifier-clause-legacy.sql.out | 133 ++++++++------- .../identifier-clause.sql.out | 151 ++++++++++-------- .../sql-tests/inputs/identifier-clause.sql | 19 +-- .../results/identifier-clause-legacy.sql.out | 70 ++++---- .../results/identifier-clause.sql.out | 74 +++++---- 5 files changed, 241 insertions(+), 206 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out index 7e4ece419983..06836a0a9b04 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out @@ -1514,16 +1514,30 @@ org.apache.spark.sql.catalyst.parser.ParseException } +-- !query +CREATE SCHEMA identifier_clause_test_schema +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] + + +-- !query +USE identifier_clause_test_schema +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] + + -- !query CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`test_show`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_show`, false -- !query -SHOW VIEWS IN IDENTIFIER('default') +SHOW VIEWS IN IDENTIFIER('identifier_clause_test_schema') -- !query analysis -ShowViewsCommand default, [namespace#x, viewName#x, isTemporary#x] +ShowViewsCommand identifier_clause_test_schema, [namespace#x, viewName#x, isTemporary#x] -- !query @@ -1534,7 +1548,7 @@ org.apache.spark.sql.AnalysisException "errorClass" : "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY", "sqlState" : "42601", "messageParameters" : { - "name" : "`spark_catalog`.`default`.`test_show`" + "name" : "`spark_catalog`.`identifier_clause_test_schema`.`test_show`" } } @@ -1543,64 +1557,64 @@ org.apache.spark.sql.AnalysisException SHOW CREATE TABLE IDENTIFIER('test_show') -- !query analysis ShowCreateTable false, [createtab_stmt#x] -+- ResolvedTable V2SessionCatalog(spark_catalog), default.test_show, V1Table(default.test_show), [c1#x, c2#x] ++- ResolvedTable V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_show, V1Table(identifier_clause_test_schema.test_show), [c1#x, c2#x] -- !query DROP TABLE test_show -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_show ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_show -- !query CREATE TABLE test_desc(c1 INT) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`test_desc`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, false -- !query DESCRIBE TABLE IDENTIFIER('test_desc') -- !query analysis -DescribeTableCommand `spark_catalog`.`default`.`test_desc`, false, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, false, [col_name#x, data_type#x, comment#x] -- !query DESCRIBE FORMATTED IDENTIFIER('test_desc') -- !query analysis -DescribeTableCommand `spark_catalog`.`default`.`test_desc`, true, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, true, [col_name#x, data_type#x, comment#x] -- !query DESCRIBE EXTENDED IDENTIFIER('test_desc') -- !query analysis -DescribeTableCommand `spark_catalog`.`default`.`test_desc`, true, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, true, [col_name#x, data_type#x, comment#x] -- !query DESC IDENTIFIER('test_desc') -- !query analysis -DescribeTableCommand `spark_catalog`.`default`.`test_desc`, false, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, false, [col_name#x, data_type#x, comment#x] -- !query DROP TABLE test_desc -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_desc ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_desc -- !query CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`test_comment`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_comment`, false -- !query COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' -- !query analysis CommentOnTable table comment -+- ResolvedTable V2SessionCatalog(spark_catalog), default.test_comment, V1Table(default.test_comment), [c1#x, c2#x] ++- ResolvedTable V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_comment, V1Table(identifier_clause_test_schema.test_comment), [c1#x, c2#x] -- !query @@ -1621,14 +1635,7 @@ org.apache.spark.sql.catalyst.parser.ParseException DROP TABLE test_comment -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_comment - - --- !query -CREATE SCHEMA identifier_clause_test_schema --- !query analysis -CreateNamespace false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_comment -- !query @@ -1668,13 +1675,6 @@ DropTable false, false +- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_table --- !query -DROP SCHEMA identifier_clause_test_schema --- !query analysis -DropNamespace false, false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] - - -- !query DECLARE IDENTIFIER('my_var') = 'value' -- !query analysis @@ -1735,7 +1735,7 @@ org.apache.spark.sql.AnalysisException "sqlState" : "42883", "messageParameters" : { "routineName" : "`test_udf`", - "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`identifier_clause_test_schema`]" }, "queryContext" : [ { "objectType" : "", @@ -1835,24 +1835,25 @@ Project [c1#x.c2 AS c1.c2#x] -- !query CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`integration_test`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`integration_test`, false -- !query INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') -- !query analysis -InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test], Append, `spark_catalog`.`default`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test), [c1, c2] +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test), [c1, c2] +- Project [col1#x AS c1#x, col2#x AS c2#x] +- LocalRelation [col1#x, col2#x] -- !query -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' - USING 'default' AS schema, 'integration_test' AS table +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) ORDER BY ALL' + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table -- !query analysis -Project [c1#x, c2#x] -+- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv +Sort [c1#x ASC NULLS FIRST, c2#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1860,8 +1861,8 @@ EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') F USING 'c' AS prefix -- !query analysis Project [c1#x, c2#x] -+- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv ++- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1870,20 +1871,20 @@ EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' -- !query analysis Project [c1#x, c2#x] +- Filter (c1#x = 1) - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`integration_test2`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`integration_test2`, false -- !query INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') -- !query analysis -InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test2, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test2], Append, `spark_catalog`.`default`.`integration_test2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test2), [c1, c3] +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test2, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test2], Append, `spark_catalog`.`identifier_clause_test_schema`.`integration_test2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test2), [c1, c3] +- Project [col1#x AS c1#x, col2#x AS c3#x] +- LocalRelation [col1#x, col2#x] @@ -1919,8 +1920,8 @@ Project [c1#x, c2#x, rn#x] +- Project [c1#x, c2#x, rn#x, rn#x] +- Window [row_number() windowspecdefinition(c2#x, c1#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#x], [c2#x], [c1#x ASC NULLS FIRST] +- Project [c1#x, c2#x] - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1928,8 +1929,8 @@ EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER USING 'c' AS prefix, 'count' AS agg, 'c1' AS col -- !query analysis Aggregate [c2#x], [c2#x, count(c1#x) AS count(c1)#xL] -+- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv ++- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1938,8 +1939,8 @@ EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DES -- !query analysis Sort [c1#x DESC NULLS LAST, c2#x ASC NULLS FIRST], true +- Project [c1#x, c2#x] - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1966,7 +1967,7 @@ org.apache.spark.sql.catalyst.parser.ParseException -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' - USING 'default' AS schema, 'my_table' AS table, 't' AS alias + USING 'identifier_clause_test_schema' AS schema, 'my_table' AS table, 't' AS alias -- !query analysis org.apache.spark.sql.catalyst.parser.ParseException { @@ -2059,7 +2060,7 @@ org.apache.spark.sql.catalyst.analysis.NoSuchTableException "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", "sqlState" : "42P01", "messageParameters" : { - "relationName" : "`spark_catalog`.`default`.`test_view`" + "relationName" : "`spark_catalog`.`identifier_clause_test_schema`.`test_view`" } } @@ -2155,28 +2156,29 @@ org.apache.spark.sql.catalyst.parser.ParseException -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' - USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab + USING 'c1' AS col1, 'c' AS p, 'identifier_clause_test_schema' AS schema, 'integration_test' AS tab -- !query analysis Sort [c1#x ASC NULLS FIRST], true +- Project [c1#x, c2#x] +- Filter (c1#x > 0) - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' - USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias -- !query analysis -Project [c1#x, c2#x] -+- Filter (c1#x > 0) - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv +Sort [c1#x ASC NULLS FIRST, c2#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x] + +- Filter (c1#x > 0) + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' - USING 'default' AS schema, 'col1' AS col + USING 'identifier_clause_test_schema' AS schema, 'col1' AS col -- !query analysis org.apache.spark.sql.catalyst.parser.ParseException { @@ -2200,11 +2202,18 @@ org.apache.spark.sql.catalyst.parser.ParseException DROP TABLE integration_test -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.integration_test ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.integration_test -- !query DROP TABLE integration_test2 -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.integration_test2 ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.integration_test2 + + +-- !query +DROP SCHEMA identifier_clause_test_schema +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index 00e364d8717f..40586eeadfc9 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -1430,16 +1430,30 @@ org.apache.spark.sql.catalyst.parser.ParseException } +-- !query +CREATE SCHEMA identifier_clause_test_schema +-- !query analysis +CreateNamespace false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] + + +-- !query +USE identifier_clause_test_schema +-- !query analysis +SetCatalogAndNamespace ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] + + -- !query CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`test_show`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_show`, false -- !query -SHOW VIEWS IN IDENTIFIER('default') +SHOW VIEWS IN IDENTIFIER('identifier_clause_test_schema') -- !query analysis -ShowViewsCommand default, [namespace#x, viewName#x, isTemporary#x] +ShowViewsCommand identifier_clause_test_schema, [namespace#x, viewName#x, isTemporary#x] -- !query @@ -1450,7 +1464,7 @@ org.apache.spark.sql.AnalysisException "errorClass" : "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY", "sqlState" : "42601", "messageParameters" : { - "name" : "`spark_catalog`.`default`.`test_show`" + "name" : "`spark_catalog`.`identifier_clause_test_schema`.`test_show`" } } @@ -1459,84 +1473,77 @@ org.apache.spark.sql.AnalysisException SHOW CREATE TABLE IDENTIFIER('test_show') -- !query analysis ShowCreateTable false, [createtab_stmt#x] -+- ResolvedTable V2SessionCatalog(spark_catalog), default.test_show, V1Table(default.test_show), [c1#x, c2#x] ++- ResolvedTable V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_show, V1Table(identifier_clause_test_schema.test_show), [c1#x, c2#x] -- !query DROP TABLE test_show -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_show ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_show -- !query CREATE TABLE test_desc(c1 INT) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`test_desc`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, false -- !query DESCRIBE TABLE IDENTIFIER('test_desc') -- !query analysis -DescribeTableCommand `spark_catalog`.`default`.`test_desc`, false, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, false, [col_name#x, data_type#x, comment#x] -- !query DESCRIBE FORMATTED IDENTIFIER('test_desc') -- !query analysis -DescribeTableCommand `spark_catalog`.`default`.`test_desc`, true, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, true, [col_name#x, data_type#x, comment#x] -- !query DESCRIBE EXTENDED IDENTIFIER('test_desc') -- !query analysis -DescribeTableCommand `spark_catalog`.`default`.`test_desc`, true, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, true, [col_name#x, data_type#x, comment#x] -- !query DESC IDENTIFIER('test_desc') -- !query analysis -DescribeTableCommand `spark_catalog`.`default`.`test_desc`, false, [col_name#x, data_type#x, comment#x] +DescribeTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_desc`, false, [col_name#x, data_type#x, comment#x] -- !query DROP TABLE test_desc -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_desc ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_desc -- !query CREATE TABLE test_comment(c1 INT, c2 STRING) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`test_comment`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`test_comment`, false -- !query COMMENT ON TABLE IDENTIFIER('test_comment') IS 'table comment' -- !query analysis CommentOnTable table comment -+- ResolvedTable V2SessionCatalog(spark_catalog), default.test_comment, V1Table(default.test_comment), [c1#x, c2#x] ++- ResolvedTable V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_comment, V1Table(identifier_clause_test_schema.test_comment), [c1#x, c2#x] -- !query ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment' -- !query analysis -AlterTableChangeColumnCommand `spark_catalog`.`default`.`test_comment`, c1, StructField(c1,IntegerType,true) +AlterTableChangeColumnCommand `spark_catalog`.`identifier_clause_test_schema`.`test_comment`, c1, StructField(c1,IntegerType,true) -- !query DROP TABLE test_comment -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.test_comment - - --- !query -CREATE SCHEMA identifier_clause_test_schema --- !query analysis -CreateNamespace false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_comment -- !query @@ -1576,13 +1583,6 @@ DropTable false, false +- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.test_table --- !query -DROP SCHEMA identifier_clause_test_schema --- !query analysis -DropNamespace false, false -+- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] - - -- !query DECLARE IDENTIFIER('my_var') = 'value' -- !query analysis @@ -1685,24 +1685,25 @@ Project [c1#x.c2 AS c1.c2#x] -- !query CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`integration_test`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`integration_test`, false -- !query INSERT INTO integration_test VALUES (1, 'a'), (2, 'b') -- !query analysis -InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test], Append, `spark_catalog`.`default`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test), [c1, c2] +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test), [c1, c2] +- Project [col1#x AS c1#x, col2#x AS c2#x] +- LocalRelation [col1#x, col2#x] -- !query -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' - USING 'default' AS schema, 'integration_test' AS table +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) ORDER BY ALL' + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table -- !query analysis -Project [c1#x, c2#x] -+- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv +Sort [c1#x ASC NULLS FIRST, c2#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1710,8 +1711,8 @@ EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') F USING 'c' AS prefix -- !query analysis Project [c1#x, c2#x] -+- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv ++- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1720,20 +1721,20 @@ EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' -- !query analysis Project [c1#x, c2#x] +- Filter (c1#x = 1) - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV -- !query analysis -CreateDataSourceTableCommand `spark_catalog`.`default`.`integration_test2`, false +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`integration_test2`, false -- !query INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y') -- !query analysis -InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test2, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test2], Append, `spark_catalog`.`default`.`integration_test2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test2), [c1, c3] +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test2, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test2], Append, `spark_catalog`.`identifier_clause_test_schema`.`integration_test2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test2), [c1, c3] +- Project [col1#x AS c1#x, col2#x AS c3#x] +- LocalRelation [col1#x, col2#x] @@ -1746,11 +1747,11 @@ Project [c1#x, c2#x, c1#x, c3#x] +- Project [c1#x, c2#x, c3#x, c1#x] +- Join Inner, (c1#x = c1#x) :- SubqueryAlias t1 - : +- SubqueryAlias spark_catalog.default.integration_test - : +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + : +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + : +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv +- SubqueryAlias t2 - +- SubqueryAlias spark_catalog.default.integration_test2 - +- Relation spark_catalog.default.integration_test2[c1#x,c3#x] csv + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test2 + +- Relation spark_catalog.identifier_clause_test_schema.integration_test2[c1#x,c3#x] csv -- !query @@ -1762,8 +1763,8 @@ Project [c1#x, c2#x, rn#x] +- Project [c1#x, c2#x, rn#x, rn#x] +- Window [row_number() windowspecdefinition(c2#x, c1#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#x], [c2#x], [c1#x ASC NULLS FIRST] +- Project [c1#x, c2#x] - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1771,8 +1772,8 @@ EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER USING 'c' AS prefix, 'count' AS agg, 'c1' AS col -- !query analysis Aggregate [c2#x], [c2#x, count(c1#x) AS count(c1)#xL] -+- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv ++- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1781,16 +1782,16 @@ EXECUTE IMMEDIATE 'SELECT * FROM integration_test ORDER BY IDENTIFIER(:col1) DES -- !query analysis Sort [c1#x DESC NULLS LAST, c2#x ASC NULLS FIRST], true +- Project [c1#x, c2#x] - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x] csv + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:col2)) VALUES (:val1, :val2)' USING 'c1' AS col1, 'c2' AS col2, 3 AS val1, 'c' AS val2 -- !query analysis -CommandResult Execute InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test], Append, `spark_catalog`.`default`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test), [c1, c2] - +- InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/integration_test], Append, `spark_catalog`.`default`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/integration_test), [c1, c2] +CommandResult Execute InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test), [c1, c2] + +- InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`integration_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/integration_test), [c1, c2] +- Project [c1#x AS c1#x, c2#x AS c2#x] +- Project [col1#x AS c1#x, col2#x AS c2#x] +- LocalRelation [col1#x, col2#x] @@ -1798,14 +1799,14 @@ CommandResult Execute InsertIntoHadoopFsRelationCommand file:[not included in co -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' - USING 'default' AS schema, 'my_table' AS table, 't' AS alias + USING 'identifier_clause_test_schema' AS schema, 'my_table' AS table, 't' AS alias -- !query analysis org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "default.my_table", + "identifier" : "identifier_clause_test_schema.my_table", "limit" : "1" }, "queryContext" : [ { @@ -1863,7 +1864,7 @@ EXECUTE IMMEDIATE 'ALTER TABLE IDENTIFIER(:tab) ADD COLUMN IDENTIFIER(:new_col) USING 'integration_test' AS tab, 'c4' AS new_col -- !query analysis CommandResult Execute AlterTableAddColumnsCommand - +- AlterTableAddColumnsCommand `spark_catalog`.`default`.`integration_test`, [StructField(c4,IntegerType,true)] + +- AlterTableAddColumnsCommand `spark_catalog`.`identifier_clause_test_schema`.`integration_test`, [StructField(c4,IntegerType,true)] -- !query @@ -1876,7 +1877,7 @@ org.apache.spark.sql.AnalysisException "sqlState" : "0A000", "messageParameters" : { "operation" : "RENAME COLUMN", - "tableName" : "`spark_catalog`.`default`.`integration_test`" + "tableName" : "`spark_catalog`.`identifier_clause_test_schema`.`integration_test`" } } @@ -1914,35 +1915,36 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' - USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab + USING 'c1' AS col1, 'c' AS p, 'identifier_clause_test_schema' AS schema, 'integration_test' AS tab -- !query analysis Sort [c1#x ASC NULLS FIRST], true +- Project [c1#x, c2#x] +- Filter (c1#x > 0) - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x,c4#x] csv + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x,c4#x] csv -- !query EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' - USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias -- !query analysis -Project [c1#x, c2#x, c4#x] -+- Filter (c1#x > 0) - +- SubqueryAlias spark_catalog.default.integration_test - +- Relation spark_catalog.default.integration_test[c1#x,c2#x,c4#x] csv +Sort [c1#x ASC NULLS FIRST, c2#x ASC NULLS FIRST, c4#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x, c4#x] + +- Filter (c1#x > 0) + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x,c4#x] csv -- !query EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' - USING 'default' AS schema, 'col1' AS col + USING 'identifier_clause_test_schema' AS schema, 'col1' AS col -- !query analysis org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "default.col1", + "identifier" : "identifier_clause_test_schema.col1", "limit" : "1" }, "queryContext" : [ { @@ -1959,11 +1961,18 @@ org.apache.spark.sql.catalyst.parser.ParseException DROP TABLE integration_test -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.integration_test ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.integration_test -- !query DROP TABLE integration_test2 -- !query analysis DropTable false, false -+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.integration_test2 ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.integration_test2 + + +-- !query +DROP SCHEMA identifier_clause_test_schema +-- !query analysis +DropNamespace false, false ++- ResolvedNamespace V2SessionCatalog(spark_catalog), [identifier_clause_test_schema] diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index 5059c7faf407..f6544aefc18a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -194,8 +194,10 @@ SELECT * FROM VALUES (1, 2) AS IDENTIFIER('schema.table')(c1, c2); SELECT 1 AS IDENTIFIER('col1.col2'); -- Additional coverage: SHOW commands with identifier-lite +CREATE SCHEMA identifier_clause_test_schema; +USE identifier_clause_test_schema; CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV; -SHOW VIEWS IN IDENTIFIER('default'); +SHOW VIEWS IN IDENTIFIER('identifier_clause_test_schema'); SHOW PARTITIONS IDENTIFIER('test_show'); SHOW CREATE TABLE IDENTIFIER('test_show'); DROP TABLE test_show; @@ -218,14 +220,12 @@ ALTER TABLE test_comment ALTER COLUMN IDENTIFIER('c1') COMMENT 'column comment'; DROP TABLE test_comment; -- Additional identifier tests with qualified table names in various commands -CREATE SCHEMA identifier_clause_test_schema; CREATE TABLE identifier_clause_test_schema.test_table(c1 INT) USING CSV; ANALYZE TABLE IDENTIFIER('identifier_clause_test_schema.test_table') COMPUTE STATISTICS; REFRESH TABLE IDENTIFIER('identifier_clause_test_schema.test_table'); DESCRIBE IDENTIFIER('identifier_clause_test_schema.test_table'); SHOW COLUMNS FROM IDENTIFIER('identifier_clause_test_schema.test_table'); DROP TABLE IDENTIFIER('identifier_clause_test_schema.test_table'); -DROP SCHEMA identifier_clause_test_schema; -- Session variables with identifier-lite DECLARE IDENTIFIER('my_var') = 'value'; @@ -262,8 +262,8 @@ EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1 ''.c2'') FROM VALUES(named_struct(''c -- Test 3: IDENTIFIER with parameter and string literal coalescing for qualified table name CREATE TABLE integration_test(c1 INT, c2 STRING) USING CSV; INSERT INTO integration_test VALUES (1, 'a'), (2, 'b'); -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' - USING 'default' AS schema, 'integration_test' AS table; +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) ORDER BY ALL' + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table; -- Test 4: IDENTIFIER in column reference with parameter and string coalescing EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' @@ -298,7 +298,7 @@ EXECUTE IMMEDIATE 'INSERT INTO integration_test(IDENTIFIER(:col1), IDENTIFIER(:c -- Test 11: Complex - IDENTIFIER with nested string operations EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' - USING 'default' AS schema, 'my_table' AS table, 't' AS alias; + USING 'identifier_clause_test_schema' AS schema, 'my_table' AS table, 't' AS alias; -- Test 12: IDENTIFIER in CTE name with parameter EXECUTE IMMEDIATE 'WITH IDENTIFIER(:cte_name)(c1) AS (VALUES(1)) SELECT c1 FROM IDENTIFIER(:cte_name)' @@ -328,18 +328,19 @@ EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS ID -- Test 17: Multiple IDENTIFIER clauses with different parameter combinations EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' - USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab; + USING 'c1' AS col1, 'c' AS p, 'identifier_clause_test_schema' AS schema, 'integration_test' AS tab; -- Test 19: IDENTIFIER with qualified name coalescing for schema.table.column pattern -- This should work for multi-part identifiers EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' - USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias; + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias; -- Test 20: Error case - IDENTIFIER with too many parts from parameter coalescing -- This should error as column alias must be single identifier EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' - USING 'default' AS schema, 'col1' AS col; + USING 'identifier_clause_test_schema' AS schema, 'col1' AS col; -- Cleanup DROP TABLE integration_test; DROP TABLE integration_test2; +DROP SCHEMA identifier_clause_test_schema; diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out index e50b3b3a3840..de0b3e9a788c 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out @@ -1682,6 +1682,22 @@ org.apache.spark.sql.catalyst.parser.ParseException } +-- !query +CREATE SCHEMA identifier_clause_test_schema +-- !query schema +struct<> +-- !query output + + + +-- !query +USE identifier_clause_test_schema +-- !query schema +struct<> +-- !query output + + + -- !query CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV -- !query schema @@ -1691,7 +1707,7 @@ struct<> -- !query -SHOW VIEWS IN IDENTIFIER('default') +SHOW VIEWS IN IDENTIFIER('identifier_clause_test_schema') -- !query schema struct -- !query output @@ -1708,7 +1724,7 @@ org.apache.spark.sql.AnalysisException "errorClass" : "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY", "sqlState" : "42601", "messageParameters" : { - "name" : "`spark_catalog`.`default`.`test_show`" + "name" : "`spark_catalog`.`identifier_clause_test_schema`.`test_show`" } } @@ -1718,7 +1734,7 @@ SHOW CREATE TABLE IDENTIFIER('test_show') -- !query schema struct -- !query output -CREATE TABLE spark_catalog.default.test_show ( +CREATE TABLE spark_catalog.identifier_clause_test_schema.test_show ( c1 INT, c2 STRING) USING CSV @@ -1757,14 +1773,14 @@ c1 int # Detailed Table Information Catalog spark_catalog -Database default +Database identifier_clause_test_schema Table test_desc Created Time [not included in comparison] Last Access [not included in comparison] Created By [not included in comparison] Type MANAGED Provider CSV -Location [not included in comparison]/{warehouse_dir}/test_desc +Location [not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/test_desc -- !query @@ -1776,14 +1792,14 @@ c1 int # Detailed Table Information Catalog spark_catalog -Database default +Database identifier_clause_test_schema Table test_desc Created Time [not included in comparison] Last Access [not included in comparison] Created By [not included in comparison] Type MANAGED Provider CSV -Location [not included in comparison]/{warehouse_dir}/test_desc +Location [not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/test_desc -- !query @@ -1842,14 +1858,6 @@ struct<> --- !query -CREATE SCHEMA identifier_clause_test_schema --- !query schema -struct<> --- !query output - - - -- !query CREATE TABLE identifier_clause_test_schema.test_table(c1 INT) USING CSV -- !query schema @@ -1898,14 +1906,6 @@ struct<> --- !query -DROP SCHEMA identifier_clause_test_schema --- !query schema -struct<> --- !query output - - - -- !query DECLARE IDENTIFIER('my_var') = 'value' -- !query schema @@ -1975,7 +1975,7 @@ org.apache.spark.sql.AnalysisException "sqlState" : "42883", "messageParameters" : { "routineName" : "`test_udf`", - "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]" + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`identifier_clause_test_schema`]" }, "queryContext" : [ { "objectType" : "", @@ -2099,8 +2099,8 @@ struct<> -- !query -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' - USING 'default' AS schema, 'integration_test' AS table +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) ORDER BY ALL' + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table -- !query schema struct -- !query output @@ -2224,7 +2224,7 @@ org.apache.spark.sql.catalyst.parser.ParseException -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' - USING 'default' AS schema, 'my_table' AS table, 't' AS alias + USING 'identifier_clause_test_schema' AS schema, 'my_table' AS table, 't' AS alias -- !query schema struct<> -- !query output @@ -2327,7 +2327,7 @@ org.apache.spark.sql.catalyst.analysis.NoSuchTableException "errorClass" : "TABLE_OR_VIEW_NOT_FOUND", "sqlState" : "42P01", "messageParameters" : { - "relationName" : "`spark_catalog`.`default`.`test_view`" + "relationName" : "`spark_catalog`.`identifier_clause_test_schema`.`test_view`" } } @@ -2431,7 +2431,7 @@ org.apache.spark.sql.catalyst.parser.ParseException -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' - USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab + USING 'c1' AS col1, 'c' AS p, 'identifier_clause_test_schema' AS schema, 'integration_test' AS tab -- !query schema struct -- !query output @@ -2441,7 +2441,7 @@ struct -- !query EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' - USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias -- !query schema struct -- !query output @@ -2451,7 +2451,7 @@ struct -- !query EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' - USING 'default' AS schema, 'col1' AS col + USING 'identifier_clause_test_schema' AS schema, 'col1' AS col -- !query schema struct<> -- !query output @@ -2487,3 +2487,11 @@ DROP TABLE integration_test2 struct<> -- !query output + + +-- !query +DROP SCHEMA identifier_clause_test_schema +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index 9303f60120e7..9410cce481cf 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -1554,6 +1554,22 @@ org.apache.spark.sql.catalyst.parser.ParseException } +-- !query +CREATE SCHEMA identifier_clause_test_schema +-- !query schema +struct<> +-- !query output + + + +-- !query +USE identifier_clause_test_schema +-- !query schema +struct<> +-- !query output + + + -- !query CREATE TABLE test_show(c1 INT, c2 STRING) USING CSV -- !query schema @@ -1563,11 +1579,11 @@ struct<> -- !query -SHOW VIEWS IN IDENTIFIER('default') +SHOW VIEWS IN IDENTIFIER('identifier_clause_test_schema') -- !query schema struct -- !query output -v + -- !query @@ -1580,7 +1596,7 @@ org.apache.spark.sql.AnalysisException "errorClass" : "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY", "sqlState" : "42601", "messageParameters" : { - "name" : "`spark_catalog`.`default`.`test_show`" + "name" : "`spark_catalog`.`identifier_clause_test_schema`.`test_show`" } } @@ -1590,7 +1606,7 @@ SHOW CREATE TABLE IDENTIFIER('test_show') -- !query schema struct -- !query output -CREATE TABLE spark_catalog.default.test_show ( +CREATE TABLE spark_catalog.identifier_clause_test_schema.test_show ( c1 INT, c2 STRING) USING CSV @@ -1629,14 +1645,14 @@ c1 int # Detailed Table Information Catalog spark_catalog -Database default +Database identifier_clause_test_schema Table test_desc Created Time [not included in comparison] Last Access [not included in comparison] Created By [not included in comparison] Type MANAGED Provider CSV -Location [not included in comparison]/{warehouse_dir}/test_desc +Location [not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/test_desc -- !query @@ -1648,14 +1664,14 @@ c1 int # Detailed Table Information Catalog spark_catalog -Database default +Database identifier_clause_test_schema Table test_desc Created Time [not included in comparison] Last Access [not included in comparison] Created By [not included in comparison] Type MANAGED Provider CSV -Location [not included in comparison]/{warehouse_dir}/test_desc +Location [not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/test_desc -- !query @@ -1706,14 +1722,6 @@ struct<> --- !query -CREATE SCHEMA identifier_clause_test_schema --- !query schema -struct<> --- !query output - - - -- !query CREATE TABLE identifier_clause_test_schema.test_table(c1 INT) USING CSV -- !query schema @@ -1762,14 +1770,6 @@ struct<> --- !query -DROP SCHEMA identifier_clause_test_schema --- !query schema -struct<> --- !query output - - - -- !query DECLARE IDENTIFIER('my_var') = 'value' -- !query schema @@ -1896,8 +1896,8 @@ struct<> -- !query -EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table)' - USING 'default' AS schema, 'integration_test' AS table +EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) ORDER BY ALL' + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table -- !query schema struct -- !query output @@ -1992,7 +1992,7 @@ struct<> -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(concat(:schema, ''.'', :table, ''.c1'')) FROM VALUES(named_struct(''c1'', 100)) AS IDENTIFIER(:alias)(IDENTIFIER(:schema ''.'' :table))' - USING 'default' AS schema, 'my_table' AS table, 't' AS alias + USING 'identifier_clause_test_schema' AS schema, 'my_table' AS table, 't' AS alias -- !query schema struct<> -- !query output @@ -2001,7 +2001,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "default.my_table", + "identifier" : "identifier_clause_test_schema.my_table", "limit" : "1" }, "queryContext" : [ { @@ -2070,7 +2070,7 @@ org.apache.spark.sql.AnalysisException "sqlState" : "0A000", "messageParameters" : { "operation" : "RENAME COLUMN", - "tableName" : "`spark_catalog`.`default`.`integration_test`" + "tableName" : "`spark_catalog`.`identifier_clause_test_schema`.`integration_test`" } } @@ -2111,7 +2111,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException -- !query EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:col1), IDENTIFIER(:p ''2'') FROM IDENTIFIER(:schema ''.'' :tab) WHERE IDENTIFIER(:col1) > 0 ORDER BY IDENTIFIER(:p ''1'')' - USING 'c1' AS col1, 'c' AS p, 'default' AS schema, 'integration_test' AS tab + USING 'c1' AS col1, 'c' AS p, 'identifier_clause_test_schema' AS schema, 'integration_test' AS tab -- !query schema struct -- !query output @@ -2122,7 +2122,7 @@ struct -- !query EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) WHERE IDENTIFIER(concat(:tab_alias, ''.c1'')) > 0 ORDER BY ALL' - USING 'default' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias + USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table, 'integration_test' AS tab_alias -- !query schema struct -- !query output @@ -2133,7 +2133,7 @@ struct -- !query EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' - USING 'default' AS schema, 'col1' AS col + USING 'identifier_clause_test_schema' AS schema, 'col1' AS col -- !query schema struct<> -- !query output @@ -2142,7 +2142,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "default.col1", + "identifier" : "identifier_clause_test_schema.col1", "limit" : "1" }, "queryContext" : [ { @@ -2169,3 +2169,11 @@ DROP TABLE integration_test2 struct<> -- !query output + + +-- !query +DROP SCHEMA identifier_clause_test_schema +-- !query schema +struct<> +-- !query output + From e24a0ed1b03c327fb85ff1cc280860e3b5b1872f Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 6 Nov 2025 09:05:21 -0800 Subject: [PATCH 15/37] Simplify code, style fixes in error messages --- .../resources/error/error-conditions.json | 28 ++--- .../sql/catalyst/parser/SqlBaseParser.g4 | 2 +- .../catalyst/parser/DataTypeAstBuilder.scala | 63 ++++------ .../sql/catalyst/parser/AstBuilder.scala | 52 +-------- .../sql/catalyst/parser/ParserUtils.scala | 108 +----------------- .../identifier-clause-legacy.sql.out | 9 +- .../identifier-clause.sql.out | 19 +-- .../sql-tests/inputs/identifier-clause.sql | 2 +- .../results/identifier-clause-legacy.sql.out | 2 +- .../results/identifier-clause.sql.out | 12 +- 10 files changed, 64 insertions(+), 233 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 220d2aa8104c..f95a9ba74c5d 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -1121,7 +1121,7 @@ }, "HASH_VARIANT_TYPE" : { "message" : [ - "Input to the function cannot contain elements of the \"VARIANT\" type yet." + "Input to the function cannot contain elements of the \"VARIANT\" type." ] }, "INPUT_SIZE_NOT_ONE" : { @@ -4822,7 +4822,7 @@ }, "PARQUET_TYPE_NOT_SUPPORTED" : { "message" : [ - "Parquet type not yet supported: ." + "Parquet type not supported: ." ], "sqlState" : "42846" }, @@ -4996,7 +4996,7 @@ }, "PROTOBUF_TYPE_NOT_SUPPORT" : { "message" : [ - "Protobuf type not yet supported: ." + "Protobuf type not supported: ." ], "sqlState" : "42K0G" }, @@ -5750,7 +5750,7 @@ }, "TABLE_VALUED_ARGUMENTS_NOT_YET_IMPLEMENTED_FOR_SQL_FUNCTIONS" : { "message" : [ - "Cannot SQL user-defined function with TABLE arguments because this functionality is not yet implemented." + "Cannot SQL user-defined function with TABLE arguments because this functionality is not supported." ], "sqlState" : "0A000" }, @@ -5963,7 +5963,7 @@ }, "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE" : { "message" : [ - "The UNION operator is not yet supported within recursive common table expressions (WITH clauses that refer to themselves, directly or indirectly). Please use UNION ALL instead." + "The UNION operator is not supported within recursive common table expressions (WITH clauses that refer to themselves, directly or indirectly). Please use UNION ALL instead." ], "sqlState" : "42836" }, @@ -6466,7 +6466,7 @@ }, "LATERAL_COLUMN_ALIAS_IN_GROUP_BY" : { "message" : [ - "Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet." + "Referencing a lateral column alias via GROUP BY alias/ALL is not supported." ] }, "LATERAL_COLUMN_ALIAS_IN_WINDOW" : { @@ -6871,7 +6871,7 @@ }, "UNSUPPORTED_SINGLE_PASS_ANALYZER_FEATURE" : { "message" : [ - "The single-pass analyzer cannot process this query or command because it does not yet support ." + "The single-pass analyzer cannot process this query or command because it does not support ." ], "sqlState" : "0A000" }, @@ -7409,7 +7409,7 @@ }, "_LEGACY_ERROR_TEMP_1018" : { "message" : [ - " is a permanent view, which is not supported by streaming reading API such as `DataStreamReader.table` yet." + " is a permanent view, which is not supported by streaming reading API such as `DataStreamReader.table`." ] }, "_LEGACY_ERROR_TEMP_1021" : { @@ -7419,7 +7419,7 @@ }, "_LEGACY_ERROR_TEMP_1030" : { "message" : [ - "Window aggregate function with filter predicate is not supported yet." + "Window aggregate function with filter predicate is not supported." ] }, "_LEGACY_ERROR_TEMP_1031" : { @@ -7530,7 +7530,7 @@ }, "_LEGACY_ERROR_TEMP_1071" : { "message" : [ - "Some existing schema fields () are not present in the new schema. We don't support dropping columns yet." + "Some existing schema fields () are not present in the new schema. We don't support dropping columns." ] }, "_LEGACY_ERROR_TEMP_1072" : { @@ -8294,7 +8294,7 @@ }, "_LEGACY_ERROR_TEMP_2030" : { "message" : [ - "Can not handle nested schema yet... plan ." + "Can not handle nested schema... plan ." ] }, "_LEGACY_ERROR_TEMP_2031" : { @@ -8349,7 +8349,7 @@ }, "_LEGACY_ERROR_TEMP_2041" : { "message" : [ - " is not implemented." + " is not supported." ] }, "_LEGACY_ERROR_TEMP_2045" : { @@ -8965,7 +8965,7 @@ }, "_LEGACY_ERROR_TEMP_2237" : { "message" : [ - ".getParentLogger is not yet implemented." + ".getParentLogger is not supported." ] }, "_LEGACY_ERROR_TEMP_2241" : { @@ -9486,7 +9486,7 @@ }, "_LEGACY_ERROR_TEMP_3082" : { "message" : [ - "Creating bucketed Hive serde table is not supported yet." + "Creating bucketed Hive serde table is not supported." ] }, "_LEGACY_ERROR_TEMP_3083" : { diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index b302439ef6fb..c63e18d12e1a 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1726,7 +1726,7 @@ singleStringLitWithoutMarker singleStringLit : singleStringLitWithoutMarker | parameterMarker -; + ; parameterMarker : {parameter_substitution_enabled}? namedParameterMarker #namedParameterMarkerRule diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 171d1c021754..f5f2399a6222 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.util.CollationFactory import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin} import org.apache.spark.sql.connector.catalog.IdentityColumnSpec -import org.apache.spark.sql.errors.QueryParsingErrors +import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryParsingErrors} import org.apache.spark.sql.internal.SqlApiConf import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType, GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType} @@ -87,11 +87,25 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, * @see * [[org.apache.spark.sql.catalyst.parser.AstBuilder]] for the full SQL statement parser */ -class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { +class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeErrorsBase { protected def typedVisit[T](ctx: ParseTree): T = { ctx.accept(this).asInstanceOf[T] } + /** + * Public helper to extract identifier parts from a context. This is exposed as public to allow + * utility classes like ParserUtils to reuse the identifier resolution logic without duplicating + * code. + * + * @param ctx + * The parser context containing the identifier. + * @return + * Sequence of identifier parts. + */ + def extractIdentifierParts(ctx: ParserRuleContext): Seq[String] = { + getIdentifierParts(ctx) + } + override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) { typedVisit[DataType](ctx.dataType) } @@ -239,51 +253,14 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { protected def getIdentifierText(ctx: ParserRuleContext): String = { val parts = getIdentifierParts(ctx) if (parts.size > 1) { - // Try to find the original IDENTIFIER('literal') context for better error messages - val literalValue = extractIdentifierLiteral(ctx) - if (literalValue.isDefined) { - throw new ParseException( - errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS", - messageParameters = Map("identifier" -> literalValue.get, "limit" -> "1"), - ctx) - } else { - // Regular qualified identifier without IDENTIFIER() - throw new IllegalStateException( - s"Expected single identifier but got qualified name: ${parts.mkString(".")}") - } + throw new ParseException( + errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS", + messageParameters = Map("identifier" -> toSQLId(parts), "limit" -> "1"), + ctx) } parts.head } - /** - * Extract the string literal value from IDENTIFIER('literal') if present in the context tree. - * Returns None if this is not an IDENTIFIER('literal') construct. - */ - private def extractIdentifierLiteral(ctx: ParserRuleContext): Option[String] = { - ctx match { - case idLitCtx: IdentifierLiteralContext => - Some(string(visitStringLit(idLitCtx.stringLit()))) - case idLitCtx: IdentifierLiteralWithExtraContext => - Some(string(visitStringLit(idLitCtx.stringLit()))) - case idCtx: IdentifierContext => - // Recurse into strictIdentifier - if (idCtx.strictIdentifier() != null) { - extractIdentifierLiteral(idCtx.strictIdentifier()) - } else { - None - } - case base: ErrorCapturingIdentifierBaseContext => - // Recurse into identifier - if (base.identifier() != null && base.identifier().strictIdentifier() != null) { - extractIdentifierLiteral(base.identifier().strictIdentifier()) - } else { - None - } - case _ => - None - } - } - /** * Create a multi-part identifier. Handles identifier-lite with qualified identifiers like * IDENTIFIER('`cat`.`schema`').table diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d6227a4f3bb3..7104da78d8e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2604,31 +2604,7 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitTableIdentifier( ctx: TableIdentifierContext): TableIdentifier = withOrigin(ctx) { - // Get the table parts (may be multiple if using qualified identifier-lite) - // Handle null case for error recovery - val tableParts = if (ctx.table != null) { - getIdentifierParts(ctx.table) - } else { - Seq("") - } - - // Get the database parts if present - val dbParts = Option(ctx.db).map { db => - getIdentifierParts(db) - } - - // Combine db and table parts - val allParts = dbParts.getOrElse(Seq.empty) ++ tableParts - - // TableIdentifier expects (table, database) where database is optional - // If we have multiple parts, the last is the table, everything before is the database path - allParts match { - case Seq(table) => TableIdentifier(table, None) - case parts if parts.size >= 2 => - TableIdentifier(parts.last, Some(parts.dropRight(1).mkString("."))) - case _ => - throw new IllegalStateException(s"Invalid table identifier: ${ctx.getText}") - } + TableIdentifier(getIdentifierText(ctx.table), Option(ctx.db).map(getIdentifierText)) } /** @@ -2637,31 +2613,7 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitFunctionIdentifier( ctx: FunctionIdentifierContext): FunctionIdentifier = withOrigin(ctx) { - // Get the function parts (may be multiple if using qualified identifier-lite) - // Handle null case for error recovery - val functionParts = if (ctx.function != null) { - getIdentifierParts(ctx.function) - } else { - Seq("") - } - - // Get the database parts if present - val dbParts = Option(ctx.db).map { db => - getIdentifierParts(db) - } - - // Combine db and function parts - val allParts = dbParts.getOrElse(Seq.empty) ++ functionParts - - // FunctionIdentifier expects (function, database) where database is optional - // If we have multiple parts, the last is the function, everything before is the database path - allParts match { - case Seq(function) => FunctionIdentifier(function, None) - case parts if parts.size >= 2 => - FunctionIdentifier(parts.last, Some(parts.dropRight(1).mkString("."))) - case _ => - throw new IllegalStateException(s"Invalid function identifier: ${ctx.getText}") - } + FunctionIdentifier(getIdentifierText(ctx.function), Option(ctx.db).map(getIdentifierText)) } /* ******************************************************************************************** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index fdaefd95ba57..b9ad20d4d2a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -52,119 +52,19 @@ object ParserUtils extends SparkParserUtils { * Gets the resolved text of a multipart identifier, handling IDENTIFIER('literal') syntax. * This method properly traverses the parse tree structure to extract identifier literals, * making it robust to comments, whitespace, and string coalescing. - * Uses the same pattern-matching approach as DataTypeAstBuilder.getIdentifierParts. + * Uses DataTypeAstBuilder.extractIdentifierParts to reuse existing logic. * * @param ctx The multipart identifier context from the parse tree. * @return The resolved identifier text as a dot-separated string. */ def getMultipartIdentifierText(ctx: MultipartIdentifierContext): String = { + // Use DataTypeAstBuilder to properly extract identifier parts. + val astBuilder = new DataTypeAstBuilder() ctx.parts.asScala.flatMap { part => - getErrorCapturingIdentifierParts(part) + astBuilder.extractIdentifierParts(part) }.mkString(".") } - /** - * Extract identifier parts from an ErrorCapturingIdentifierContext. - * Mirrors the logic in DataTypeAstBuilder.getIdentifierParts but adapted for use - * in ParserUtils where we don't have access to the full AstBuilder infrastructure. - */ - private def getErrorCapturingIdentifierParts( - ctx: SqlBaseParser.ErrorCapturingIdentifierContext): Seq[String] = { - - ctx match { - case base: SqlBaseParser.ErrorCapturingIdentifierBaseContext => - // Regular identifier with errorCapturingIdentifierExtra. - val identifier = base.identifier() - if (identifier != null && identifier.strictIdentifier() != null) { - getStrictIdentifierParts(identifier.strictIdentifier()) - } else { - Seq(ctx.getText) - } - case idLit: SqlBaseParser.IdentifierLiteralWithExtraContext => - // IDENTIFIER('literal') in errorCapturingIdentifier. - val literalValue = extractStringLiteralValue(idLit.stringLit()) - // Parse the literal as a multipart identifier. - try { - CatalystSqlParser.parseMultipartIdentifier(literalValue) - } catch { - case _: ParseException => Seq(literalValue) - } - case _ => - Seq(ctx.getText) - } - } - - /** - * Extract identifier parts from a StrictIdentifierContext. - * Mirrors DataTypeAstBuilder logic for strictIdentifier contexts. - */ - private def getStrictIdentifierParts( - ctx: SqlBaseParser.StrictIdentifierContext): Seq[String] = { - ctx match { - case idLit: SqlBaseParser.IdentifierLiteralContext => - // IDENTIFIER('literal') in strictIdentifier. - val literalValue = extractStringLiteralValue(idLit.stringLit()) - try { - CatalystSqlParser.parseMultipartIdentifier(literalValue) - } catch { - case _: ParseException => Seq(literalValue) - } - case _ => - // Regular identifier (unquoted, quoted, or keyword). - Seq(ctx.getText) - } - } - - /** - * Extract the string value from a StringLitContext. - * This properly handles string coalescing ('a' 'b' -> 'ab'), escaping, and whitespace/comments. - * Mirrors the string extraction logic used in DataTypeAstBuilder. - */ - private def extractStringLiteralValue(ctx: SqlBaseParser.StringLitContext): String = { - if (ctx == null) { - return "" - } - - // Extract all string literal tokens from the parse tree. - val tokens = ctx.singleStringLit().asScala.flatMap { singleStr => - val childCount = singleStr.getChildCount - if (childCount > 0) { - val child = singleStr.getChild(0) - child match { - case terminal: org.antlr.v4.runtime.tree.TerminalNode => - Some(terminal.getSymbol) - case _ => None - } - } else { - None - } - } - - if (tokens.isEmpty) { - // Fallback: extract via getText if token extraction failed. - val text = ctx.getText - if (text.startsWith("'") && text.endsWith("'")) { - return text.substring(1, text.length - 1).replace("''", "'") - } else if (text.startsWith("\"") && text.endsWith("\"")) { - return text.substring(1, text.length - 1).replace("\"\"", "\"") - } else { - return text - } - } - - // Coalesce multiple string literals and unescape. - tokens.map { token => - val text = token.getText - if (text.startsWith("'") && text.endsWith("'")) { - text.substring(1, text.length - 1).replace("''", "'") - } else if (text.startsWith("\"") && text.endsWith("\"")) { - text.substring(1, text.length - 1).replace("\"\"", "\"") - } else { - text - } - }.mkString("") - } - def checkDuplicateClauses[T]( nodes: util.List[T], clauseName: String, ctx: ParserRuleContext): Unit = { if (nodes.size() > 1) { diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out index 06836a0a9b04..94b1bc05c455 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out @@ -1857,12 +1857,13 @@ Sort [c1#x ASC NULLS FIRST, c2#x ASC NULLS FIRST], true -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test ORDER BY ALL' USING 'c' AS prefix -- !query analysis -Project [c1#x, c2#x] -+- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test - +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv +Sort [c1#x ASC NULLS FIRST, c2#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index 40586eeadfc9..20f1c6811d50 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -1072,7 +1072,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "x.win", + "identifier" : "`x`.`win`", "limit" : "1" }, "queryContext" : [ { @@ -1396,7 +1396,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "schema.table", + "identifier" : "`schema`.`table`", "limit" : "1" }, "queryContext" : [ { @@ -1417,7 +1417,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "col1.col2", + "identifier" : "`col1`.`col2`", "limit" : "1" }, "queryContext" : [ { @@ -1707,12 +1707,13 @@ Sort [c1#x ASC NULLS FIRST, c2#x ASC NULLS FIRST], true -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test ORDER BY ALL' USING 'c' AS prefix -- !query analysis -Project [c1#x, c2#x] -+- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test - +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv +Sort [c1#x ASC NULLS FIRST, c2#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1806,7 +1807,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "identifier_clause_test_schema.my_table", + "identifier" : "`identifier_clause_test_schema`.`my_table`", "limit" : "1" }, "queryContext" : [ { @@ -1944,7 +1945,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "identifier_clause_test_schema.col1", + "identifier" : "`identifier_clause_test_schema`.`col1`", "limit" : "1" }, "queryContext" : [ { diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index f6544aefc18a..f57a498d4795 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -266,7 +266,7 @@ EXECUTE IMMEDIATE 'SELECT * FROM IDENTIFIER(:schema ''.'' :table) ORDER BY ALL' USING 'identifier_clause_test_schema' AS schema, 'integration_test' AS table; -- Test 4: IDENTIFIER in column reference with parameter and string coalescing -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test ORDER BY ALL' USING 'c' AS prefix; -- Test 5: IDENTIFIER in WHERE clause with parameters diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out index de0b3e9a788c..cf48f4d2aa5d 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out @@ -2109,7 +2109,7 @@ struct -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test ORDER BY ALL' USING 'c' AS prefix -- !query schema struct diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index 9410cce481cf..36ecac90fb16 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -1183,7 +1183,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "x.win", + "identifier" : "`x`.`win`", "limit" : "1" }, "queryContext" : [ { @@ -1518,7 +1518,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "schema.table", + "identifier" : "`schema`.`table`", "limit" : "1" }, "queryContext" : [ { @@ -1541,7 +1541,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "col1.col2", + "identifier" : "`col1`.`col2`", "limit" : "1" }, "queryContext" : [ { @@ -1906,7 +1906,7 @@ struct -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''1''), IDENTIFIER(:prefix ''2'') FROM integration_test ORDER BY ALL' USING 'c' AS prefix -- !query schema struct @@ -2001,7 +2001,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "identifier_clause_test_schema.my_table", + "identifier" : "`identifier_clause_test_schema`.`my_table`", "limit" : "1" }, "queryContext" : [ { @@ -2142,7 +2142,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "errorClass" : "IDENTIFIER_TOO_MANY_NAME_PARTS", "sqlState" : "42601", "messageParameters" : { - "identifier" : "identifier_clause_test_schema.col1", + "identifier" : "`identifier_clause_test_schema`.`col1`", "limit" : "1" }, "queryContext" : [ { From 9b2454aedf6171b20b45ba33f50f3e86b13eb1bd Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 6 Nov 2025 10:59:18 -0800 Subject: [PATCH 16/37] Fix testcases --- .../identifier-clause-legacy.sql.out | 21 ++++++------ .../identifier-clause.sql.out | 32 ++++++++++--------- .../sql-tests/inputs/identifier-clause.sql | 6 ++-- .../results/identifier-clause-legacy.sql.out | 14 ++++---- .../results/identifier-clause.sql.out | 6 ++-- .../sql/hive/execution/HiveDDLSuite.scala | 2 +- .../sql/hive/execution/HiveSerDeSuite.scala | 2 +- 7 files changed, 43 insertions(+), 40 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out index 94b1bc05c455..3437054a1fbf 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out @@ -1891,7 +1891,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query -EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col)) ORDER BY ALL' USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col -- !query analysis org.apache.spark.sql.catalyst.parser.ParseException @@ -1906,8 +1906,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "objectType" : "EXECUTE IMMEDIATE", "objectName" : "", "startIndex" : 1, - "stopIndex" : 90, - "fragment" : "SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))" + "stopIndex" : 103, + "fragment" : "SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col)) ORDER BY ALL" } ] } @@ -1926,12 +1926,13 @@ Project [c1#x, c2#x, rn#x] -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'') ORDER BY ALL' USING 'c' AS prefix, 'count' AS agg, 'c1' AS col -- !query analysis -Aggregate [c2#x], [c2#x, count(c1#x) AS count(c1)#xL] -+- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test - +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv +Sort [c2#x ASC NULLS FIRST, count(c1)#xL ASC NULLS FIRST], true ++- Aggregate [c2#x], [c2#x, count(c1#x) AS count(c1)#xL] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -2133,7 +2134,7 @@ org.apache.spark.sql.catalyst.parser.ParseException -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias) ORDER BY ALL' USING 't' AS alias -- !query analysis org.apache.spark.sql.catalyst.parser.ParseException @@ -2148,8 +2149,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "objectType" : "EXECUTE IMMEDIATE", "objectName" : "", "startIndex" : 1, - "stopIndex" : 75, - "fragment" : "SELECT IDENTIFIER(:alias '.c1') FROM integration_test AS IDENTIFIER(:alias)" + "stopIndex" : 88, + "fragment" : "SELECT IDENTIFIER(:alias '.c1') FROM integration_test AS IDENTIFIER(:alias) ORDER BY ALL" } ] } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index 20f1c6811d50..dbe8c6269740 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -1741,18 +1741,19 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query -EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col)) ORDER BY ALL' USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col -- !query analysis -Project [c1#x, c2#x, c1#x, c3#x] -+- Project [c1#x, c2#x, c3#x, c1#x] - +- Join Inner, (c1#x = c1#x) - :- SubqueryAlias t1 - : +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test - : +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv - +- SubqueryAlias t2 - +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test2 - +- Relation spark_catalog.identifier_clause_test_schema.integration_test2[c1#x,c3#x] csv +Sort [c1#x ASC NULLS FIRST, c2#x ASC NULLS FIRST, c1#x ASC NULLS FIRST, c3#x ASC NULLS FIRST], true ++- Project [c1#x, c2#x, c1#x, c3#x] + +- Project [c1#x, c2#x, c3#x, c1#x] + +- Join Inner, (c1#x = c1#x) + :- SubqueryAlias t1 + : +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + : +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv + +- SubqueryAlias t2 + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test2 + +- Relation spark_catalog.identifier_clause_test_schema.integration_test2[c1#x,c3#x] csv -- !query @@ -1769,12 +1770,13 @@ Project [c1#x, c2#x, rn#x] -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'') ORDER BY ALL' USING 'c' AS prefix, 'count' AS agg, 'c1' AS col -- !query analysis -Aggregate [c2#x], [c2#x, count(c1#x) AS count(c1)#xL] -+- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test - +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv +Sort [c2#x ASC NULLS FIRST, count(c1)#xL ASC NULLS FIRST], true ++- Aggregate [c2#x], [c2#x, count(c1#x) AS count(c1)#xL] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.integration_test + +- Relation spark_catalog.identifier_clause_test_schema.integration_test[c1#x,c2#x] csv -- !query @@ -1892,7 +1894,7 @@ Project [map(mykey, 42)[mykey] AS result#x] -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias) ORDER BY ALL' USING 't' AS alias -- !query analysis org.apache.spark.sql.catalyst.ExtendedAnalysisException diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index f57a498d4795..b2470b23f422 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -276,7 +276,7 @@ EXECUTE IMMEDIATE 'SELECT * FROM integration_test WHERE IDENTIFIER(:col) = :val' -- Test 6: IDENTIFIER in JOIN with parameters for table and column names CREATE TABLE integration_test2(c1 INT, c3 STRING) USING CSV; INSERT INTO integration_test2 VALUES (1, 'x'), (2, 'y'); -EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col)) ORDER BY ALL' USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col; -- Test 7: IDENTIFIER in window function with parameter for partition column @@ -285,7 +285,7 @@ EXECUTE IMMEDIATE USING 'c1' AS col1, 'c2' AS col2, 'c2' AS part, 'c1' AS ord; -- Test 8: IDENTIFIER in aggregate function with string coalescing -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'') ORDER BY ALL' USING 'c' AS prefix, 'count' AS agg, 'c1' AS col; -- Test 9: IDENTIFIER in ORDER BY with multiple parameters @@ -322,7 +322,7 @@ EXECUTE IMMEDIATE 'SELECT map(:key, :val).IDENTIFIER(:key) AS result' USING 'mykey' AS key, 42 AS val; -- Test 16: IDENTIFIER in table alias with string coalescing -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias) ORDER BY ALL' USING 't' AS alias; -- Test 17: Multiple IDENTIFIER clauses with different parameter combinations diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out index cf48f4d2aa5d..e5ec83099ffd 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out @@ -2144,7 +2144,7 @@ struct<> -- !query -EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col)) ORDER BY ALL' USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col -- !query schema struct<> @@ -2161,8 +2161,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "objectType" : "EXECUTE IMMEDIATE", "objectName" : "", "startIndex" : 1, - "stopIndex" : 90, - "fragment" : "SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))" + "stopIndex" : 103, + "fragment" : "SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col)) ORDER BY ALL" } ] } @@ -2179,7 +2179,7 @@ struct -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'') ORDER BY ALL' USING 'c' AS prefix, 'count' AS agg, 'c1' AS col -- !query schema struct @@ -2405,7 +2405,7 @@ org.apache.spark.sql.catalyst.parser.ParseException -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias) ORDER BY ALL' USING 't' AS alias -- !query schema struct<> @@ -2422,8 +2422,8 @@ org.apache.spark.sql.catalyst.parser.ParseException "objectType" : "EXECUTE IMMEDIATE", "objectName" : "", "startIndex" : 1, - "stopIndex" : 75, - "fragment" : "SELECT IDENTIFIER(:alias '.c1') FROM integration_test AS IDENTIFIER(:alias)" + "stopIndex" : 88, + "fragment" : "SELECT IDENTIFIER(:alias '.c1') FROM integration_test AS IDENTIFIER(:alias) ORDER BY ALL" } ] } diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index 36ecac90fb16..e9fe5fe37c96 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -1941,7 +1941,7 @@ struct<> -- !query -EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col))' +EXECUTE IMMEDIATE 'SELECT t1.*, t2.* FROM IDENTIFIER(:t1) t1 JOIN IDENTIFIER(:t2) t2 USING (IDENTIFIER(:col)) ORDER BY ALL' USING 'integration_test' AS t1, 'integration_test2' AS t2, 'c1' AS col -- !query schema struct @@ -1962,7 +1962,7 @@ struct -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'')' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:prefix ''2''), IDENTIFIER(:agg)(IDENTIFIER(:col)) FROM integration_test GROUP BY IDENTIFIER(:prefix ''2'') ORDER BY ALL' USING 'c' AS prefix, 'count' AS agg, 'c1' AS col -- !query schema struct @@ -2085,7 +2085,7 @@ struct -- !query -EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias)' +EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias) ORDER BY ALL' USING 't' AS alias -- !query schema struct<> diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index caa4ca4581b4..f5c787944bd0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2000,7 +2000,7 @@ class HiveDDLSuite val e2 = intercept[AnalysisException] { Seq(1 -> "a").toDF("i", "j").write.format("hive").bucketBy(4, "i").saveAsTable("t1") } - assert(e2.message.contains("Creating bucketed Hive serde table is not supported yet")) + assert(e2.message.contains("Creating bucketed Hive serde table is not supported")) checkError( exception = intercept[AnalysisException] { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala index aac601043f33..5a21dcc2aaf1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala @@ -170,7 +170,7 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte val v2 = "CREATE TABLE t (c1 int, c2 int) USING hive CLUSTERED BY (c2) INTO 4 BUCKETS" val e2 = intercept[AnalysisException](analyzeCreateTable(v2)) - assert(e2.message.contains("Creating bucketed Hive serde table is not supported yet")) + assert(e2.message.contains("Creating bucketed Hive serde table is not supported")) val v3 = """ From 608e1eae015d51b0d3454be2ce64cea04fe8c2b4 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 6 Nov 2025 13:14:51 -0800 Subject: [PATCH 17/37] Introduce simpleIdentifier --- .../resources/error/error-conditions.json | 28 ++++---- .../sql/catalyst/parser/SqlBaseParser.g4 | 28 ++++++-- .../catalyst/parser/DataTypeAstBuilder.scala | 40 ++++++------ .../parser/SubstituteParmsAstBuilder.scala | 2 + .../sql/catalyst/parser/AstBuilder.scala | 64 ++++++++++--------- .../sql/hive/execution/HiveDDLSuite.scala | 2 +- .../sql/hive/execution/HiveSerDeSuite.scala | 2 +- 7 files changed, 94 insertions(+), 72 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index f95a9ba74c5d..220d2aa8104c 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -1121,7 +1121,7 @@ }, "HASH_VARIANT_TYPE" : { "message" : [ - "Input to the function cannot contain elements of the \"VARIANT\" type." + "Input to the function cannot contain elements of the \"VARIANT\" type yet." ] }, "INPUT_SIZE_NOT_ONE" : { @@ -4822,7 +4822,7 @@ }, "PARQUET_TYPE_NOT_SUPPORTED" : { "message" : [ - "Parquet type not supported: ." + "Parquet type not yet supported: ." ], "sqlState" : "42846" }, @@ -4996,7 +4996,7 @@ }, "PROTOBUF_TYPE_NOT_SUPPORT" : { "message" : [ - "Protobuf type not supported: ." + "Protobuf type not yet supported: ." ], "sqlState" : "42K0G" }, @@ -5750,7 +5750,7 @@ }, "TABLE_VALUED_ARGUMENTS_NOT_YET_IMPLEMENTED_FOR_SQL_FUNCTIONS" : { "message" : [ - "Cannot SQL user-defined function with TABLE arguments because this functionality is not supported." + "Cannot SQL user-defined function with TABLE arguments because this functionality is not yet implemented." ], "sqlState" : "0A000" }, @@ -5963,7 +5963,7 @@ }, "UNION_NOT_SUPPORTED_IN_RECURSIVE_CTE" : { "message" : [ - "The UNION operator is not supported within recursive common table expressions (WITH clauses that refer to themselves, directly or indirectly). Please use UNION ALL instead." + "The UNION operator is not yet supported within recursive common table expressions (WITH clauses that refer to themselves, directly or indirectly). Please use UNION ALL instead." ], "sqlState" : "42836" }, @@ -6466,7 +6466,7 @@ }, "LATERAL_COLUMN_ALIAS_IN_GROUP_BY" : { "message" : [ - "Referencing a lateral column alias via GROUP BY alias/ALL is not supported." + "Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet." ] }, "LATERAL_COLUMN_ALIAS_IN_WINDOW" : { @@ -6871,7 +6871,7 @@ }, "UNSUPPORTED_SINGLE_PASS_ANALYZER_FEATURE" : { "message" : [ - "The single-pass analyzer cannot process this query or command because it does not support ." + "The single-pass analyzer cannot process this query or command because it does not yet support ." ], "sqlState" : "0A000" }, @@ -7409,7 +7409,7 @@ }, "_LEGACY_ERROR_TEMP_1018" : { "message" : [ - " is a permanent view, which is not supported by streaming reading API such as `DataStreamReader.table`." + " is a permanent view, which is not supported by streaming reading API such as `DataStreamReader.table` yet." ] }, "_LEGACY_ERROR_TEMP_1021" : { @@ -7419,7 +7419,7 @@ }, "_LEGACY_ERROR_TEMP_1030" : { "message" : [ - "Window aggregate function with filter predicate is not supported." + "Window aggregate function with filter predicate is not supported yet." ] }, "_LEGACY_ERROR_TEMP_1031" : { @@ -7530,7 +7530,7 @@ }, "_LEGACY_ERROR_TEMP_1071" : { "message" : [ - "Some existing schema fields () are not present in the new schema. We don't support dropping columns." + "Some existing schema fields () are not present in the new schema. We don't support dropping columns yet." ] }, "_LEGACY_ERROR_TEMP_1072" : { @@ -8294,7 +8294,7 @@ }, "_LEGACY_ERROR_TEMP_2030" : { "message" : [ - "Can not handle nested schema... plan ." + "Can not handle nested schema yet... plan ." ] }, "_LEGACY_ERROR_TEMP_2031" : { @@ -8349,7 +8349,7 @@ }, "_LEGACY_ERROR_TEMP_2041" : { "message" : [ - " is not supported." + " is not implemented." ] }, "_LEGACY_ERROR_TEMP_2045" : { @@ -8965,7 +8965,7 @@ }, "_LEGACY_ERROR_TEMP_2237" : { "message" : [ - ".getParentLogger is not supported." + ".getParentLogger is not yet implemented." ] }, "_LEGACY_ERROR_TEMP_2241" : { @@ -9486,7 +9486,7 @@ }, "_LEGACY_ERROR_TEMP_3082" : { "message" : [ - "Creating bucketed Hive serde table is not supported." + "Creating bucketed Hive serde table is not supported yet." ] }, "_LEGACY_ERROR_TEMP_3083" : { diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index c63e18d12e1a..99c8b0328431 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -328,7 +328,7 @@ statement | SHOW VIEWS ((FROM | IN) identifierReference)? (LIKE? pattern=stringLit)? #showViews | SHOW PARTITIONS identifierReference partitionSpec? #showPartitions - | SHOW identifier? FUNCTIONS ((FROM | IN) ns=identifierReference)? + | SHOW functionScope=simpleIdentifier? FUNCTIONS ((FROM | IN) ns=identifierReference)? (LIKE? (legacy=multipartIdentifier | pattern=stringLit))? #showFunctions | SHOW PROCEDURES ((FROM | IN) identifierReference)? #showProcedures | SHOW CREATE TABLE identifierReference (AS SERDE)? #showCreateTable @@ -840,8 +840,8 @@ hint ; hintStatement - : hintName=identifier - | hintName=identifier LEFT_PAREN parameters+=primaryExpression (COMMA parameters+=primaryExpression)* RIGHT_PAREN + : hintName=simpleIdentifier + | hintName=simpleIdentifier LEFT_PAREN parameters+=primaryExpression (COMMA parameters+=primaryExpression)* RIGHT_PAREN ; fromClause @@ -1248,7 +1248,7 @@ primaryExpression | identifier #columnReference | base=primaryExpression DOT fieldName=identifier #dereference | LEFT_PAREN expression RIGHT_PAREN #parenthesizedExpression - | EXTRACT LEFT_PAREN field=identifier FROM source=valueExpression RIGHT_PAREN #extract + | EXTRACT LEFT_PAREN field=simpleIdentifier FROM source=valueExpression RIGHT_PAREN #extract | (SUBSTR | SUBSTRING) LEFT_PAREN str=valueExpression (FROM | COMMA) pos=valueExpression ((FOR | COMMA) len=valueExpression)? RIGHT_PAREN #substring | TRIM LEFT_PAREN trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)? @@ -1304,7 +1304,7 @@ constant ; namedParameterMarker - : COLON identifier + : COLON simpleIdentifier ; comparisonOperator : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ @@ -1607,6 +1607,16 @@ identifier | {!SQL_standard_keyword_behavior}? strictNonReserved ; +// simpleIdentifier: like identifier but without IDENTIFIER('literal') support +// Use this for contexts where IDENTIFIER() syntax is not appropriate: +// - Named parameters (:param_name) +// - Extract field names (EXTRACT(field FROM ...)) +// - Other keyword-like or string-like uses +simpleIdentifier + : simpleStrictIdentifier + | {!SQL_standard_keyword_behavior}? strictNonReserved + ; + strictIdentifier : IDENTIFIER #unquotedIdentifier | quotedIdentifier #quotedIdentifierAlternative @@ -1615,6 +1625,14 @@ strictIdentifier | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier ; +// simpleStrictIdentifier: like strictIdentifier but without IDENTIFIER('literal') support +simpleStrictIdentifier + : IDENTIFIER #unquotedIdentifier + | quotedIdentifier #quotedIdentifierAlternative + | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier + | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier + ; + quotedIdentifier : BACKQUOTED_IDENTIFIER | {double_quoted_identifiers}? DOUBLEQUOTED_STRING diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index f5f2399a6222..04434b91be82 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -211,35 +211,33 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = { ctx match { case idCtx: IdentifierContext => - // identifier can be either strictIdentifier or strictNonReserved - // Recursively process the strictIdentifier - if (idCtx.strictIdentifier() != null) { - getIdentifierParts(idCtx.strictIdentifier()) - } else { - Seq(ctx.getText) - } + // identifier can be either strictIdentifier or strictNonReserved. + // Recursively process the strictIdentifier. + Option(idCtx.strictIdentifier()).map(getIdentifierParts).getOrElse(Seq(ctx.getText)) + case idLitCtx: IdentifierLiteralContext => - // For IDENTIFIER('literal') in strictIdentifier + // For IDENTIFIER('literal') in strictIdentifier. val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // This base implementation just returns the literal as a single part - // Subclasses should override to parse qualified identifiers + // This base implementation just returns the literal as a single part. + // Subclasses should override to parse qualified identifiers. Seq(literalValue) + case idLitCtx: IdentifierLiteralWithExtraContext => - // For IDENTIFIER('literal') in errorCapturingIdentifier + // For IDENTIFIER('literal') in errorCapturingIdentifier. val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // This base implementation just returns the literal as a single part - // Subclasses should override to parse qualified identifiers + // This base implementation just returns the literal as a single part. + // Subclasses should override to parse qualified identifiers. Seq(literalValue) + case base: ErrorCapturingIdentifierBaseContext => - // Regular identifier with errorCapturingIdentifierExtra - // Need to recursively handle identifier which might itself be IDENTIFIER('literal') - if (base.identifier() != null && base.identifier().strictIdentifier() != null) { - getIdentifierParts(base.identifier().strictIdentifier()) - } else { - Seq(ctx.getText) - } + // Regular identifier with errorCapturingIdentifierExtra. + // Need to recursively handle identifier which might itself be IDENTIFIER('literal'). + Option(base.identifier()).flatMap(id => + Option(id.strictIdentifier()).map(getIdentifierParts) + ).getOrElse(Seq(ctx.getText)) + case _ => - // For regular identifiers, just return the text as a single part + // For regular identifiers, just return the text as a single part. Seq(ctx.getText) } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala index 8beeb9b17d4c..cf0217ac840a 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala @@ -81,6 +81,7 @@ class SubstituteParmsAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { */ override def visitNamedParameterLiteral(ctx: NamedParameterLiteralContext): AnyRef = withOrigin(ctx) { + // Named parameters use simpleIdentifier, so .getText() is correct. val paramName = ctx.namedParameterMarker().identifier().getText namedParams += paramName @@ -117,6 +118,7 @@ class SubstituteParmsAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { */ override def visitNamedParameterMarkerRule(ctx: NamedParameterMarkerRuleContext): AnyRef = withOrigin(ctx) { + // Named parameters use simpleIdentifier, so .getText() is correct. val paramName = ctx.namedParameterMarker().identifier().getText namedParams += paramName diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 7104da78d8e0..47c8a352e17a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -125,37 +125,23 @@ class AstBuilder extends DataTypeAstBuilder */ override protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = { ctx match { - case idCtx: IdentifierContext => - // identifier can be either strictIdentifier or strictNonReserved - // Recursively process the strictIdentifier - if (idCtx.strictIdentifier() != null) { - getIdentifierParts(idCtx.strictIdentifier()) - } else { - Seq(ctx.getText) - } case idLitCtx: IdentifierLiteralContext => - // For IDENTIFIER('literal') in strictIdentifier + // For IDENTIFIER('literal') in strictIdentifier. val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // Parse the string as a multi-part identifier + // Parse the string as a multi-part identifier. // (e.g., "`cat`.`schema`" -> Seq("cat", "schema")) CatalystSqlParser.parseMultipartIdentifier(literalValue) + case idLitCtx: IdentifierLiteralWithExtraContext => - // For IDENTIFIER('literal') in errorCapturingIdentifier + // For IDENTIFIER('literal') in errorCapturingIdentifier. val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // Parse the string as a multi-part identifier + // Parse the string as a multi-part identifier. // (e.g., "`cat`.`schema`" -> Seq("cat", "schema")) CatalystSqlParser.parseMultipartIdentifier(literalValue) - case base: ErrorCapturingIdentifierBaseContext => - // Regular identifier with errorCapturingIdentifierExtra - // Need to recursively handle identifier which might itself be IDENTIFIER('literal') - if (base.identifier() != null && base.identifier().strictIdentifier() != null) { - getIdentifierParts(base.identifier().strictIdentifier()) - } else { - Seq(ctx.getText) - } + case _ => - // For regular identifiers, just return the text as a single part - Seq(ctx.getText) + // Delegate all other cases to the base implementation. + super.getIdentifierParts(ctx) } } @@ -1970,6 +1956,7 @@ class AstBuilder extends DataTypeAstBuilder query: LogicalPlan): LogicalPlan = withOrigin(ctx) { var plan = query ctx.hintStatements.asScala.reverse.foreach { stmt => + // Hint names use simpleIdentifier, so .getText() is correct. plan = UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(expression).toSeq, plan) } @@ -2086,7 +2073,7 @@ class AstBuilder extends DataTypeAstBuilder override def visitUnpivotColumnAndAlias(ctx: UnpivotColumnAndAliasContext): (NamedExpression, Option[String]) = withOrigin(ctx) { val attr = visitUnpivotColumn(ctx.unpivotColumn()) - val alias = Option(ctx.unpivotAlias()).map(_.errorCapturingIdentifier().getText) + val alias = Option(ctx.unpivotAlias()).map(a => getIdentifierText(a.errorCapturingIdentifier())) (attr, alias) } @@ -2098,7 +2085,7 @@ class AstBuilder extends DataTypeAstBuilder (Seq[NamedExpression], Option[String]) = withOrigin(ctx) { val exprs = ctx.unpivotColumns.asScala.map(visitUnpivotColumn).toSeq - val alias = Option(ctx.unpivotAlias()).map(_.errorCapturingIdentifier().getText) + val alias = Option(ctx.unpivotAlias()).map(a => getIdentifierText(a.errorCapturingIdentifier())) (exprs, alias) } @@ -2114,9 +2101,9 @@ class AstBuilder extends DataTypeAstBuilder unrequiredChildIndex = Nil, outer = ctx.OUTER != null, // scalastyle:off caselocale - Some(ctx.tblName.getText.toLowerCase), + Some(getIdentifierText(ctx.tblName).toLowerCase), // scalastyle:on caselocale - ctx.colName.asScala.map(_.getText).map(UnresolvedAttribute.quoted).toSeq, + ctx.colName.asScala.map(getIdentifierText).map(UnresolvedAttribute.quoted).toSeq, query) } @@ -3021,8 +3008,11 @@ class AstBuilder extends DataTypeAstBuilder } } else { // If the parser is not in ansi mode, we should return `UnresolvedAttribute`, in case there - // are columns named `CURRENT_DATE` or `CURRENT_TIMESTAMP` or `CURRENT_TIME` + // are columns named `CURRENT_DATE` or `CURRENT_TIMESTAMP` or `CURRENT_TIME`. + // scalastyle:off parser.gettext + // ctx.name is a token, not an identifier context. UnresolvedAttribute.quoted(ctx.name.getText) + // scalastyle:on parser.gettext } } @@ -4176,7 +4166,7 @@ class AstBuilder extends DataTypeAstBuilder ctx: ColumnConstraintDefinitionContext): TableConstraint = { withOrigin(ctx) { val name = if (ctx.name != null) { - ctx.name.getText + getIdentifierText(ctx.name) } else { null } @@ -5646,7 +5636,7 @@ class AstBuilder extends DataTypeAstBuilder ctx: TableConstraintDefinitionContext): TableConstraint = withOrigin(ctx) { val name = if (ctx.name != null) { - ctx.name.getText + getIdentifierText(ctx.name) } else { null } @@ -5750,7 +5740,7 @@ class AstBuilder extends DataTypeAstBuilder ctx.identifierReference, "ALTER TABLE ... DROP CONSTRAINT") DropConstraint( table, - ctx.name.getText, + getIdentifierText(ctx.name), ifExists = ctx.EXISTS() != null, cascade = ctx.CASCADE() != null) } @@ -6360,6 +6350,7 @@ class AstBuilder extends DataTypeAstBuilder * Create a plan for a SHOW FUNCTIONS command. */ override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = withOrigin(ctx) { + // Function scope uses simpleIdentifier, so .getText() is correct. val (userScope, systemScope) = Option(ctx.identifier) .map(_.getText.toLowerCase(Locale.ROOT)) match { case None | Some("all") => (true, true) @@ -6492,12 +6483,18 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitTimestampadd(ctx: TimestampaddContext): Expression = withOrigin(ctx) { if (ctx.invalidUnit != null) { + // scalastyle:off parser.gettext + // ctx.name and ctx.invalidUnit are tokens, not identifier contexts. throw QueryParsingErrors.invalidDatetimeUnitError( ctx, ctx.name.getText, ctx.invalidUnit.getText) + // scalastyle:on parser.gettext } else { + // scalastyle:off parser.gettext + // ctx.unit is a token, not an identifier context. TimestampAdd(ctx.unit.getText, expression(ctx.unitsAmount), expression(ctx.timestamp)) + // scalastyle:on parser.gettext } } @@ -6506,12 +6503,18 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitTimestampdiff(ctx: TimestampdiffContext): Expression = withOrigin(ctx) { if (ctx.invalidUnit != null) { + // scalastyle:off parser.gettext + // ctx.name and ctx.invalidUnit are tokens, not identifier contexts. throw QueryParsingErrors.invalidDatetimeUnitError( ctx, ctx.name.getText, ctx.invalidUnit.getText) + // scalastyle:on parser.gettext } else { + // scalastyle:off parser.gettext + // ctx.unit is a token, not an identifier context. TimestampDiff(ctx.unit.getText, expression(ctx.startTimestamp), expression(ctx.endTimestamp)) + // scalastyle:on parser.gettext } } @@ -6520,6 +6523,7 @@ class AstBuilder extends DataTypeAstBuilder * */ override def visitNamedParameterLiteral( ctx: NamedParameterLiteralContext): Expression = withOrigin(ctx) { + // Named parameters use simpleIdentifier, so .getText() is correct. NamedParameter(ctx.namedParameterMarker().identifier().getText) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index f5c787944bd0..caa4ca4581b4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2000,7 +2000,7 @@ class HiveDDLSuite val e2 = intercept[AnalysisException] { Seq(1 -> "a").toDF("i", "j").write.format("hive").bucketBy(4, "i").saveAsTable("t1") } - assert(e2.message.contains("Creating bucketed Hive serde table is not supported")) + assert(e2.message.contains("Creating bucketed Hive serde table is not supported yet")) checkError( exception = intercept[AnalysisException] { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala index 5a21dcc2aaf1..aac601043f33 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala @@ -170,7 +170,7 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte val v2 = "CREATE TABLE t (c1 int, c2 int) USING hive CLUSTERED BY (c2) INTO 4 BUCKETS" val e2 = intercept[AnalysisException](analyzeCreateTable(v2)) - assert(e2.message.contains("Creating bucketed Hive serde table is not supported")) + assert(e2.message.contains("Creating bucketed Hive serde table is not supported yet")) val v3 = """ From 9715ccb08a9c9ae2d05cbc0dcd4157e5836e0db5 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 6 Nov 2025 19:25:54 -0800 Subject: [PATCH 18/37] Fix build --- .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 8 ++++---- .../parser/SubstituteParmsAstBuilder.scala | 4 ++-- .../spark/sql/catalyst/parser/AstBuilder.scala | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 99c8b0328431..960b1867632d 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1627,10 +1627,10 @@ strictIdentifier // simpleStrictIdentifier: like strictIdentifier but without IDENTIFIER('literal') support simpleStrictIdentifier - : IDENTIFIER #unquotedIdentifier - | quotedIdentifier #quotedIdentifierAlternative - | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier - | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier + : IDENTIFIER #simpleUnquotedIdentifier + | quotedIdentifier #simpleQuotedIdentifierAlternative + | {SQL_standard_keyword_behavior}? ansiNonReserved #simpleUnquotedIdentifier + | {!SQL_standard_keyword_behavior}? nonReserved #simpleUnquotedIdentifier ; quotedIdentifier diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala index cf0217ac840a..f32c1d6f3836 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParmsAstBuilder.scala @@ -82,7 +82,7 @@ class SubstituteParmsAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { override def visitNamedParameterLiteral(ctx: NamedParameterLiteralContext): AnyRef = withOrigin(ctx) { // Named parameters use simpleIdentifier, so .getText() is correct. - val paramName = ctx.namedParameterMarker().identifier().getText + val paramName = ctx.namedParameterMarker().simpleIdentifier().getText namedParams += paramName // Calculate the location of the entire parameter (including the colon) @@ -119,7 +119,7 @@ class SubstituteParmsAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] { override def visitNamedParameterMarkerRule(ctx: NamedParameterMarkerRuleContext): AnyRef = withOrigin(ctx) { // Named parameters use simpleIdentifier, so .getText() is correct. - val paramName = ctx.namedParameterMarker().identifier().getText + val paramName = ctx.namedParameterMarker().simpleIdentifier().getText namedParams += paramName // Calculate the location of the entire parameter (including the colon) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 47c8a352e17a..7c857816a9a5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2085,7 +2085,8 @@ class AstBuilder extends DataTypeAstBuilder (Seq[NamedExpression], Option[String]) = withOrigin(ctx) { val exprs = ctx.unpivotColumns.asScala.map(visitUnpivotColumn).toSeq - val alias = Option(ctx.unpivotAlias()).map(a => getIdentifierText(a.errorCapturingIdentifier())) + val alias = + Option(ctx.unpivotAlias()).map(a => getIdentifierText(a.errorCapturingIdentifier())) (exprs, alias) } @@ -6351,12 +6352,18 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = withOrigin(ctx) { // Function scope uses simpleIdentifier, so .getText() is correct. - val (userScope, systemScope) = Option(ctx.identifier) - .map(_.getText.toLowerCase(Locale.ROOT)) match { + val scope = Option(ctx.functionScope) + val (userScope, systemScope) = scope.map(_.getText.toLowerCase(Locale.ROOT)) match { case None | Some("all") => (true, true) case Some("system") => (false, true) case Some("user") => (true, false) - case Some(x) => throw QueryParsingErrors.showFunctionsUnsupportedError(x, ctx.identifier()) + case Some(x) => + // Cast to IdentifierContext for backward compatibility with error signature. + // The error method signature expects IdentifierContext but we use simpleIdentifier + // in the grammar now. Since both inherit from ParserRuleContext and the error + // method only uses it for location info, this cast is safe. + throw QueryParsingErrors.showFunctionsUnsupportedError( + x, ctx.functionScope.asInstanceOf[IdentifierContext]) } val legacy = Option(ctx.legacy).map(visitMultipartIdentifier) @@ -6524,7 +6531,7 @@ class AstBuilder extends DataTypeAstBuilder override def visitNamedParameterLiteral( ctx: NamedParameterLiteralContext): Expression = withOrigin(ctx) { // Named parameters use simpleIdentifier, so .getText() is correct. - NamedParameter(ctx.namedParameterMarker().identifier().getText) + NamedParameter(ctx.namedParameterMarker().simpleIdentifier().getText) } /** From 774eb2bcf3e2e3c7ebb63394d87e118684ec5c6e Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 6 Nov 2025 20:55:52 -0800 Subject: [PATCH 19/37] Address comment by Wenchen --- .../catalyst/parser/DataTypeAstBuilder.scala | 35 ++++++++++++++----- .../sql/catalyst/parser/AstBuilder.scala | 28 +++------------ 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 04434b91be82..76434ee2bcb8 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -200,6 +200,25 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE ctx.getText.toInt } + /** + * Parse a string into a multi-part identifier. This method is intended to be overridden by + * subclasses that have access to a full SQL parser. The base implementation simply returns the + * input as a single-part identifier. + * + * For example, in AstBuilder, this would parse "`catalog`.`schema`.`table`" into Seq("catalog", + * "schema", "table"). + * + * @param identifier + * The identifier string to parse, potentially containing dots and backticks. + * @return + * Sequence of identifier parts. + */ + protected def parseMultipartIdentifier(identifier: String): Seq[String] = { + // Base implementation: just return the string as a single part. + // Subclasses with access to a full parser should override this. + Seq(identifier) + } + /** * Get the identifier parts from a context, handling both regular identifiers and * IDENTIFIER('literal'). This method is used to support identifier-lite syntax where @@ -218,23 +237,21 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE case idLitCtx: IdentifierLiteralContext => // For IDENTIFIER('literal') in strictIdentifier. val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // This base implementation just returns the literal as a single part. - // Subclasses should override to parse qualified identifiers. - Seq(literalValue) + // Parse the string to handle qualified identifiers like "`cat`.`schema`". + parseMultipartIdentifier(literalValue) case idLitCtx: IdentifierLiteralWithExtraContext => // For IDENTIFIER('literal') in errorCapturingIdentifier. val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // This base implementation just returns the literal as a single part. - // Subclasses should override to parse qualified identifiers. - Seq(literalValue) + // Parse the string to handle qualified identifiers like "`cat`.`schema`". + parseMultipartIdentifier(literalValue) case base: ErrorCapturingIdentifierBaseContext => // Regular identifier with errorCapturingIdentifierExtra. // Need to recursively handle identifier which might itself be IDENTIFIER('literal'). - Option(base.identifier()).flatMap(id => - Option(id.strictIdentifier()).map(getIdentifierParts) - ).getOrElse(Seq(ctx.getText)) + Option(base.identifier()) + .flatMap(id => Option(id.strictIdentifier()).map(getIdentifierParts)) + .getOrElse(Seq(ctx.getText)) case _ => // For regular identifiers, just return the text as a single part. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 7c857816a9a5..58b4872ab4ab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -119,30 +119,12 @@ class AstBuilder extends DataTypeAstBuilder } /** - * Override the base getIdentifierParts to properly parse qualified identifiers in - * IDENTIFIER('literal') contexts. Uses CatalystSqlParser to handle qualified identifiers - * like IDENTIFIER('`catalog`.`schema`') which should be parsed as Seq("catalog", "schema"). + * Override to provide actual multi-part identifier parsing using CatalystSqlParser. This allows + * the base class to handle IDENTIFIER('qualified.identifier') without needing special case + * logic in getIdentifierParts. */ - override protected def getIdentifierParts(ctx: ParserRuleContext): Seq[String] = { - ctx match { - case idLitCtx: IdentifierLiteralContext => - // For IDENTIFIER('literal') in strictIdentifier. - val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // Parse the string as a multi-part identifier. - // (e.g., "`cat`.`schema`" -> Seq("cat", "schema")) - CatalystSqlParser.parseMultipartIdentifier(literalValue) - - case idLitCtx: IdentifierLiteralWithExtraContext => - // For IDENTIFIER('literal') in errorCapturingIdentifier. - val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // Parse the string as a multi-part identifier. - // (e.g., "`cat`.`schema`" -> Seq("cat", "schema")) - CatalystSqlParser.parseMultipartIdentifier(literalValue) - - case _ => - // Delegate all other cases to the base implementation. - super.getIdentifierParts(ctx) - } + override protected def parseMultipartIdentifier(identifier: String): Seq[String] = { + CatalystSqlParser.parseMultipartIdentifier(identifier) } /** From cdafbc6294ae6bba8bfddc2d82de5a4c23e476df Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 6 Nov 2025 22:30:09 -0800 Subject: [PATCH 20/37] Add more tests --- .../spark/sql/errors/QueryParsingErrors.scala | 2 +- .../sql/catalyst/parser/AstBuilder.scala | 7 +- .../identifier-clause-legacy.sql.out | 210 +++++++++++++++ .../identifier-clause.sql.out | 196 ++++++++++++++ .../sql-tests/inputs/identifier-clause.sql | 36 +++ .../results/identifier-clause-legacy.sql.out | 239 ++++++++++++++++++ .../results/identifier-clause.sql.out | 213 ++++++++++++++++ 7 files changed, 896 insertions(+), 7 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 630f274a621e..553161ea2db0 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -477,7 +477,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase { ctx) } - def showFunctionsUnsupportedError(identifier: String, ctx: IdentifierContext): Throwable = { + def showFunctionsUnsupportedError(identifier: String, ctx: ParserRuleContext): Throwable = { new ParseException( errorClass = "INVALID_SQL_SYNTAX.SHOW_FUNCTIONS_INVALID_SCOPE", messageParameters = Map("scope" -> toSQLId(identifier)), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 58b4872ab4ab..685906cabc70 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -6340,12 +6340,7 @@ class AstBuilder extends DataTypeAstBuilder case Some("system") => (false, true) case Some("user") => (true, false) case Some(x) => - // Cast to IdentifierContext for backward compatibility with error signature. - // The error method signature expects IdentifierContext but we use simpleIdentifier - // in the grammar now. Since both inherit from ParserRuleContext and the error - // method only uses it for location info, this cast is safe. - throw QueryParsingErrors.showFunctionsUnsupportedError( - x, ctx.functionScope.asInstanceOf[IdentifierContext]) + throw QueryParsingErrors.showFunctionsUnsupportedError(x, ctx.functionScope) } val legacy = Option(ctx.legacy).map(visitMultipartIdentifier) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out index 3437054a1fbf..17fcc9b47729 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause-legacy.sql.out @@ -2214,6 +2214,216 @@ DropTable false, false +- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.integration_test2 +-- !query +CREATE TABLE lateral_test(arr ARRAY) USING PARQUET +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`lateral_test`, false + + +-- !query +INSERT INTO lateral_test VALUES (array(1, 2, 3)) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/lateral_test, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/lateral_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`lateral_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/lateral_test), [arr] ++- Project [col1#x AS arr#x] + +- LocalRelation [col1#x] + + +-- !query +SELECT * FROM lateral_test LATERAL VIEW explode(arr) IDENTIFIER('tbl') AS IDENTIFIER('col') ORDER BY ALL +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM lateral_test LATERAL VIEW OUTER explode(arr) IDENTIFIER('my_table') AS IDENTIFIER('my_col') ORDER BY ALL +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE lateral_test +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.lateral_test + + +-- !query +CREATE TABLE unpivot_test(id INT, a INT, b INT, c INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`unpivot_test`, false + + +-- !query +INSERT INTO unpivot_test VALUES (1, 10, 20, 30) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/unpivot_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/unpivot_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`unpivot_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/unpivot_test), [id, a, b, c] ++- Project [col1#x AS id#x, col2#x AS a#x, col3#x AS b#x, col4#x AS c#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x] + + +-- !query +SELECT * FROM unpivot_test UNPIVOT (val FOR col IN (a AS IDENTIFIER('col_a'), b AS IDENTIFIER('col_b'))) ORDER BY ALL +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN ((a, b) AS IDENTIFIER('cols_ab'), (b, c) AS IDENTIFIER('cols_bc'))) ORDER BY ALL +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE unpivot_test +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.unpivot_test + + +-- !query +SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''param1''", + "hint" : "" + } +} + + +-- !query +CREATE TABLE hint_test(c1 INT, c2 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`hint_test`, false + + +-- !query +INSERT INTO hint_test VALUES (1, 2), (3, 4) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/hint_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/hint_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`hint_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/hint_test), [c1, c2] ++- Project [col1#x AS c1#x, col2#x AS c2#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +SELECT /*+ IDENTIFIER('BROADCAST')(hint_test) */ * FROM hint_test +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT /*+ IDENTIFIER('MERGE')(hint_test) */ * FROM hint_test +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE hint_test +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.hint_test + + +-- !query +SHOW IDENTIFIER('USER') FUNCTIONS +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT EXTRACT(IDENTIFIER('YEAR') FROM DATE'2024-01-15') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT TIMESTAMPADD(IDENTIFIER('YEAR'), 1, DATE'2024-01-15') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`TIMESTAMPADD`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`identifier_clause_test_schema`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 60, + "fragment" : "TIMESTAMPADD(IDENTIFIER('YEAR'), 1, DATE'2024-01-15')" + } ] +} + + -- !query DROP SCHEMA identifier_clause_test_schema -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index dbe8c6269740..63d6f01e2926 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -1974,6 +1974,202 @@ DropTable false, false +- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.integration_test2 +-- !query +CREATE TABLE lateral_test(arr ARRAY) USING PARQUET +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`lateral_test`, false + + +-- !query +INSERT INTO lateral_test VALUES (array(1, 2, 3)) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/lateral_test, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/lateral_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`lateral_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/lateral_test), [arr] ++- Project [col1#x AS arr#x] + +- LocalRelation [col1#x] + + +-- !query +SELECT * FROM lateral_test LATERAL VIEW explode(arr) IDENTIFIER('tbl') AS IDENTIFIER('col') ORDER BY ALL +-- !query analysis +Sort [arr#x ASC NULLS FIRST, col#x ASC NULLS FIRST], true ++- Project [arr#x, col#x] + +- Generate explode(arr#x), false, tbl, [col#x] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.lateral_test + +- Relation spark_catalog.identifier_clause_test_schema.lateral_test[arr#x] parquet + + +-- !query +SELECT * FROM lateral_test LATERAL VIEW OUTER explode(arr) IDENTIFIER('my_table') AS IDENTIFIER('my_col') ORDER BY ALL +-- !query analysis +Sort [arr#x ASC NULLS FIRST, my_col#x ASC NULLS FIRST], true ++- Project [arr#x, my_col#x] + +- Generate explode(arr#x), true, my_table, [my_col#x] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.lateral_test + +- Relation spark_catalog.identifier_clause_test_schema.lateral_test[arr#x] parquet + + +-- !query +DROP TABLE lateral_test +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.lateral_test + + +-- !query +CREATE TABLE unpivot_test(id INT, a INT, b INT, c INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`unpivot_test`, false + + +-- !query +INSERT INTO unpivot_test VALUES (1, 10, 20, 30) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/unpivot_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/unpivot_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`unpivot_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/unpivot_test), [id, a, b, c] ++- Project [col1#x AS id#x, col2#x AS a#x, col3#x AS b#x, col4#x AS c#x] + +- LocalRelation [col1#x, col2#x, col3#x, col4#x] + + +-- !query +SELECT * FROM unpivot_test UNPIVOT (val FOR col IN (a AS IDENTIFIER('col_a'), b AS IDENTIFIER('col_b'))) ORDER BY ALL +-- !query analysis +Sort [id#x ASC NULLS FIRST, c#x ASC NULLS FIRST, col#x ASC NULLS FIRST, val#x ASC NULLS FIRST], true ++- Project [id#x, c#x, col#x, val#x] + +- Filter isnotnull(coalesce(val#x)) + +- Expand [[id#x, c#x, col_a, a#x], [id#x, c#x, col_b, b#x]], [id#x, c#x, col#x, val#x] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.unpivot_test + +- Relation spark_catalog.identifier_clause_test_schema.unpivot_test[id#x,a#x,b#x,c#x] csv + + +-- !query +SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN ((a, b) AS IDENTIFIER('cols_ab'), (b, c) AS IDENTIFIER('cols_bc'))) ORDER BY ALL +-- !query analysis +Sort [id#x ASC NULLS FIRST, col#x ASC NULLS FIRST, v1#x ASC NULLS FIRST, v2#x ASC NULLS FIRST], true ++- Project [id#x, col#x, v1#x, v2#x] + +- Filter isnotnull(coalesce(v1#x, v2#x)) + +- Expand [[id#x, cols_ab, a#x, b#x], [id#x, cols_bc, b#x, c#x]], [id#x, col#x, v1#x, v2#x] + +- SubqueryAlias spark_catalog.identifier_clause_test_schema.unpivot_test + +- Relation spark_catalog.identifier_clause_test_schema.unpivot_test[id#x,a#x,b#x,c#x] csv + + +-- !query +DROP TABLE unpivot_test +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.unpivot_test + + +-- !query +SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1) +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''param1''", + "hint" : "" + } +} + + +-- !query +CREATE TABLE hint_test(c1 INT, c2 INT) USING CSV +-- !query analysis +CreateDataSourceTableCommand `spark_catalog`.`identifier_clause_test_schema`.`hint_test`, false + + +-- !query +INSERT INTO hint_test VALUES (1, 2), (3, 4) +-- !query analysis +InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/hint_test, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/hint_test], Append, `spark_catalog`.`identifier_clause_test_schema`.`hint_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/identifier_clause_test_schema.db/hint_test), [c1, c2] ++- Project [col1#x AS c1#x, col2#x AS c2#x] + +- LocalRelation [col1#x, col2#x] + + +-- !query +SELECT /*+ IDENTIFIER('BROADCAST')(hint_test) */ * FROM hint_test +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT /*+ IDENTIFIER('MERGE')(hint_test) */ * FROM hint_test +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE hint_test +-- !query analysis +DropTable false, false ++- ResolvedIdentifier V2SessionCatalog(spark_catalog), identifier_clause_test_schema.hint_test + + +-- !query +SHOW IDENTIFIER('USER') FUNCTIONS +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT EXTRACT(IDENTIFIER('YEAR') FROM DATE'2024-01-15') +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'FROM'", + "hint" : "" + } +} + + +-- !query +SELECT TIMESTAMPADD(IDENTIFIER('YEAR'), 1, DATE'2024-01-15') +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`TIMESTAMPADD`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`identifier_clause_test_schema`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 60, + "fragment" : "TIMESTAMPADD(IDENTIFIER('YEAR'), 1, DATE'2024-01-15')" + } ] +} + + -- !query DROP SCHEMA identifier_clause_test_schema -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql index b2470b23f422..c90165d31e90 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql @@ -343,4 +343,40 @@ EXECUTE IMMEDIATE 'SELECT 1 AS IDENTIFIER(:schema ''.'' :col)' -- Cleanup DROP TABLE integration_test; DROP TABLE integration_test2; + +-- LATERAL VIEW with IDENTIFIER() for table and column names +CREATE TABLE lateral_test(arr ARRAY) USING PARQUET; +INSERT INTO lateral_test VALUES (array(1, 2, 3)); +SELECT * FROM lateral_test LATERAL VIEW explode(arr) IDENTIFIER('tbl') AS IDENTIFIER('col') ORDER BY ALL; +SELECT * FROM lateral_test LATERAL VIEW OUTER explode(arr) IDENTIFIER('my_table') AS IDENTIFIER('my_col') ORDER BY ALL; +DROP TABLE lateral_test; + +-- UNPIVOT with IDENTIFIER() for value column alias +CREATE TABLE unpivot_test(id INT, a INT, b INT, c INT) USING CSV; +INSERT INTO unpivot_test VALUES (1, 10, 20, 30); +SELECT * FROM unpivot_test UNPIVOT (val FOR col IN (a AS IDENTIFIER('col_a'), b AS IDENTIFIER('col_b'))) ORDER BY ALL; +SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN ((a, b) AS IDENTIFIER('cols_ab'), (b, c) AS IDENTIFIER('cols_bc'))) ORDER BY ALL; +DROP TABLE unpivot_test; + +-- All the following tests fail because they are not about "true" identifiers + +-- This should fail - named parameters don't support IDENTIFIER() +SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1); + +-- Hint names use simpleIdentifier - these should fail +CREATE TABLE hint_test(c1 INT, c2 INT) USING CSV; +INSERT INTO hint_test VALUES (1, 2), (3, 4); +SELECT /*+ IDENTIFIER('BROADCAST')(hint_test) */ * FROM hint_test; +SELECT /*+ IDENTIFIER('MERGE')(hint_test) */ * FROM hint_test; +DROP TABLE hint_test; + +-- These should fail - function scope doesn't support IDENTIFIER() +SHOW IDENTIFIER('USER') FUNCTIONS; + +-- EXTRACT field name uses simpleIdentifier - should fail +SELECT EXTRACT(IDENTIFIER('YEAR') FROM DATE'2024-01-15'); + +-- TIMESTAMPADD unit is a token, not identifier - should fail +SELECT TIMESTAMPADD(IDENTIFIER('YEAR'), 1, DATE'2024-01-15'); + DROP SCHEMA identifier_clause_test_schema; diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out index e5ec83099ffd..ad351074692f 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause-legacy.sql.out @@ -2489,6 +2489,245 @@ struct<> +-- !query +CREATE TABLE lateral_test(arr ARRAY) USING PARQUET +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO lateral_test VALUES (array(1, 2, 3)) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM lateral_test LATERAL VIEW explode(arr) IDENTIFIER('tbl') AS IDENTIFIER('col') ORDER BY ALL +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM lateral_test LATERAL VIEW OUTER explode(arr) IDENTIFIER('my_table') AS IDENTIFIER('my_col') ORDER BY ALL +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE lateral_test +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE unpivot_test(id INT, a INT, b INT, c INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO unpivot_test VALUES (1, 10, 20, 30) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM unpivot_test UNPIVOT (val FOR col IN (a AS IDENTIFIER('col_a'), b AS IDENTIFIER('col_b'))) ORDER BY ALL +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN ((a, b) AS IDENTIFIER('cols_ab'), (b, c) AS IDENTIFIER('cols_bc'))) ORDER BY ALL +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE unpivot_test +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''param1''", + "hint" : "" + } +} + + +-- !query +CREATE TABLE hint_test(c1 INT, c2 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO hint_test VALUES (1, 2), (3, 4) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT /*+ IDENTIFIER('BROADCAST')(hint_test) */ * FROM hint_test +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT /*+ IDENTIFIER('MERGE')(hint_test) */ * FROM hint_test +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE hint_test +-- !query schema +struct<> +-- !query output + + + +-- !query +SHOW IDENTIFIER('USER') FUNCTIONS +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT EXTRACT(IDENTIFIER('YEAR') FROM DATE'2024-01-15') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT TIMESTAMPADD(IDENTIFIER('YEAR'), 1, DATE'2024-01-15') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`TIMESTAMPADD`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`identifier_clause_test_schema`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 60, + "fragment" : "TIMESTAMPADD(IDENTIFIER('YEAR'), 1, DATE'2024-01-15')" + } ] +} + + -- !query DROP SCHEMA identifier_clause_test_schema -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index e9fe5fe37c96..75fa8735a7e9 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -2171,6 +2171,219 @@ struct<> +-- !query +CREATE TABLE lateral_test(arr ARRAY) USING PARQUET +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO lateral_test VALUES (array(1, 2, 3)) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM lateral_test LATERAL VIEW explode(arr) IDENTIFIER('tbl') AS IDENTIFIER('col') ORDER BY ALL +-- !query schema +struct,col:int> +-- !query output +[1,2,3] 1 +[1,2,3] 2 +[1,2,3] 3 + + +-- !query +SELECT * FROM lateral_test LATERAL VIEW OUTER explode(arr) IDENTIFIER('my_table') AS IDENTIFIER('my_col') ORDER BY ALL +-- !query schema +struct,my_col:int> +-- !query output +[1,2,3] 1 +[1,2,3] 2 +[1,2,3] 3 + + +-- !query +DROP TABLE lateral_test +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE TABLE unpivot_test(id INT, a INT, b INT, c INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO unpivot_test VALUES (1, 10, 20, 30) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT * FROM unpivot_test UNPIVOT (val FOR col IN (a AS IDENTIFIER('col_a'), b AS IDENTIFIER('col_b'))) ORDER BY ALL +-- !query schema +struct +-- !query output +1 30 col_a 10 +1 30 col_b 20 + + +-- !query +SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN ((a, b) AS IDENTIFIER('cols_ab'), (b, c) AS IDENTIFIER('cols_bc'))) ORDER BY ALL +-- !query schema +struct +-- !query output +1 cols_ab 10 20 +1 cols_bc 20 30 + + +-- !query +DROP TABLE unpivot_test +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "''param1''", + "hint" : "" + } +} + + +-- !query +CREATE TABLE hint_test(c1 INT, c2 INT) USING CSV +-- !query schema +struct<> +-- !query output + + + +-- !query +INSERT INTO hint_test VALUES (1, 2), (3, 4) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT /*+ IDENTIFIER('BROADCAST')(hint_test) */ * FROM hint_test +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT /*+ IDENTIFIER('MERGE')(hint_test) */ * FROM hint_test +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +DROP TABLE hint_test +-- !query schema +struct<> +-- !query output + + + +-- !query +SHOW IDENTIFIER('USER') FUNCTIONS +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'('", + "hint" : "" + } +} + + +-- !query +SELECT EXTRACT(IDENTIFIER('YEAR') FROM DATE'2024-01-15') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'FROM'", + "hint" : "" + } +} + + +-- !query +SELECT TIMESTAMPADD(IDENTIFIER('YEAR'), 1, DATE'2024-01-15') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_ROUTINE", + "sqlState" : "42883", + "messageParameters" : { + "routineName" : "`TIMESTAMPADD`", + "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`identifier_clause_test_schema`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 60, + "fragment" : "TIMESTAMPADD(IDENTIFIER('YEAR'), 1, DATE'2024-01-15')" + } ] +} + + -- !query DROP SCHEMA identifier_clause_test_schema -- !query schema From af00d2d8d74c0959cd9cf13cdb76f6aa8f6618fc Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Fri, 7 Nov 2025 09:46:58 -0800 Subject: [PATCH 21/37] Fix parameter coalescing and internal error on unbound parameters --- .../parser/SubstituteParamsParser.scala | 17 +++-- .../spark/sql/classic/SparkSession.scala | 24 ++++--- .../apache/spark/sql/ParametersSuite.scala | 72 +++++++++++++++++++ .../sql/StringLiteralCoalescingSuite.scala | 37 ++++++++++ 4 files changed, 137 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala index 54c8c2ec089f..a615d012811a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala @@ -186,6 +186,8 @@ class SubstituteParamsParser extends Logging { /** * Apply a list of substitutions to the SQL text. + * Inserts a space separator when a parameter is immediately preceded by a quote + * to avoid back-to-back quotes after substitution. */ private def applySubstitutions(sqlText: String, substitutions: List[Substitution]): String = { // Sort substitutions by start position in reverse order to avoid offset issues @@ -193,9 +195,17 @@ class SubstituteParamsParser extends Logging { var result = sqlText sortedSubstitutions.foreach { substitution => - result = result.substring(0, substitution.start) + - substitution.replacement + - result.substring(substitution.end) + val prefix = result.substring(0, substitution.start) + val replacement = substitution.replacement + val suffix = result.substring(substitution.end) + + // Check if replacement is immediately preceded by a quote and doesn't already + // start with whitespace + val needsSpace = substitution.start > 0 && + (result(substitution.start - 1) == '\'' || result(substitution.start - 1) == '"') && + replacement.nonEmpty && !replacement(0).isWhitespace + + result = prefix + (if (needsSpace) " " else "") + replacement + suffix } result } @@ -211,4 +221,3 @@ object SubstituteParamsParser { positionalParams: List[String] = List.empty): (String, Int, PositionMapper) = instance.substitute(sqlText, namedParams, positionalParams) } - diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala index f7876d9a023b..706a1bfb9ebe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala @@ -501,14 +501,21 @@ class SparkSession private( private[sql] def sql(sqlText: String, args: Array[_], tracker: QueryPlanningTracker): DataFrame = withActive { val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) { - val parsedPlan = if (args.nonEmpty) { - // Resolve and validate parameters first - val paramMap = args.zipWithIndex.map { case (arg, idx) => - s"_pos_$idx" -> lit(arg).expr - }.toMap - val resolvedParams = resolveAndValidateParameters(paramMap) + val parsedPlan = { + // Always parse with parameter context to detect unbound parameter markers. + // Even if args is empty, we need to detect and reject parameter markers in the SQL. + val (paramMap, resolvedParams) = if (args.nonEmpty) { + val pMap = args.zipWithIndex.map { case (arg, idx) => + s"_pos_$idx" -> lit(arg).expr + }.toMap + (pMap, resolveAndValidateParameters(pMap)) + } else { + (Map.empty[String, Expression], Map.empty[String, Expression]) + } + val paramContext = PositionalParameterContext(resolvedParams.values.toSeq) val parsed = sessionState.sqlParser.parsePlanWithParameters(sqlText, paramContext) + // Check for SQL scripting with positional parameters if (parsed.isInstanceOf[CompoundBody]) { throw SqlScriptingErrors.positionalParametersAreNotSupportedWithSqlScripting() @@ -519,8 +526,6 @@ class SparkSession private( } else { parsed } - } else { - sessionState.sqlParser.parsePlan(sqlText) } parsedPlan } @@ -574,7 +579,8 @@ class SparkSession private( } else { // No parameters - parse normally without parameter context val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) { - sessionState.sqlParser.parsePlan(sqlText) + val paramContext = HybridParameterContext(Seq.empty, Seq.empty) + sessionState.sqlParser.parsePlanWithParameters(sqlText, paramContext) } Dataset.ofRows(self, plan, tracker) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala index e30b48fdb176..53e661f83d4c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala @@ -2374,4 +2374,76 @@ class ParametersSuite extends QueryTest with SharedSparkSession { expectedStopPos = Some(46) // End of "nonexistent_table" in inner query ) } + + test("detect unbound named parameter with empty map") { + // When sql() is called with empty map, parameter markers should still be detected + val exception = intercept[AnalysisException] { + spark.sql("SELECT :param", Map.empty[String, Any]) + } + checkError( + exception = exception, + condition = "UNBOUND_SQL_PARAMETER", + parameters = Map("name" -> "param"), + context = ExpectedContext( + fragment = ":param", + start = 7, + stop = 12)) + } + + test("detect unbound positional parameter with empty array") { + // When sql() is called with empty array, parameter markers should still be detected + val exception = intercept[AnalysisException] { + spark.sql("SELECT ?", Array.empty[Any]) + } + checkError( + exception = exception, + condition = "UNBOUND_SQL_PARAMETER", + parameters = Map("name" -> "_7"), + context = ExpectedContext( + fragment = "?", + start = 7, + stop = 7)) + } + + test("detect unbound named parameter with no arguments") { + val exception = intercept[AnalysisException] { + spark.sql("SELECT :param") + } + checkError( + exception = exception, + condition = "UNBOUND_SQL_PARAMETER", + parameters = Map("name" -> "param"), + context = ExpectedContext( + fragment = ":param", + start = 7, + stop = 12)) + } + + test("detect unbound positional parameter with nop arguments") { + val exception = intercept[AnalysisException] { + spark.sql("SELECT ?") + } + checkError( + exception = exception, + condition = "UNBOUND_SQL_PARAMETER", + parameters = Map("name" -> "_7"), + context = ExpectedContext( + fragment = "?", + start = 7, + stop = 7)) + } + + test("empty map with no parameters - should succeed") { + // When there are no parameter markers, empty map should work fine + checkAnswer( + spark.sql("SELECT 1", Map.empty[String, Any]), + Row(1)) + } + + test("empty array with no parameters - should succeed") { + // When there are no parameter markers, empty array should work fine + checkAnswer( + spark.sql("SELECT 1", Array.empty[Any]), + Row(1)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringLiteralCoalescingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringLiteralCoalescingSuite.scala index ea305f219aa5..e98cfc8bebcc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringLiteralCoalescingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringLiteralCoalescingSuite.scala @@ -867,4 +867,41 @@ class StringLiteralCoalescingSuite extends QueryTest with SharedSparkSession { Row("1a2b3c4") ) } + + test("parameter substitution with quote spacing - legacy consecutive string literals disabled") { + // With LEGACY_CONSECUTIVE_STRING_LITERALS enabled, '' would normally produce a single quote + // But with parameter substitution, 'literal':param should insert a space to prevent + // the closing quote and opening quote from being interpreted as an escape sequence + withSQLConf("spark.sql.legacy.consecutiveStringLiterals.enabled" -> "false") { + checkAnswer( + spark.sql("SELECT 'hello':p, 'hello''world'", Map("p" -> "world")), + // Space parameter separates literals, no singleton quote in middle + Row("helloworld", "hello'world") + ) + } + } + + test("parameter substitution with quote spacing - legacy consecutive string literals enabled") { + withSQLConf("spark.sql.legacy.consecutiveStringLiterals.enabled" -> "true") { + checkAnswer( + spark.sql("SELECT 'hello':p, 'hello''world'", Map("p" -> "world")), + // Space parameter separates literals, no singleton quote in middle + Row("helloworld", "helloworld") + ) + } + } + + // ======================================================================== + // Legacy Mode Tests - JSON Path Expressions vs Parameter Substitution + // ======================================================================== + test("JSON path expression - new mode with parameter substitution") { + // In new mode (constantsOnly=false), :name is a parameter marker when args provided + withSQLConf("spark.sql.legacy.parameterSubstitution.constantsOnly" -> "false") { + // The :name gets substituted with the parameter value + checkAnswer( + spark.sql("SELECT '{\"name\":\"joe\"}' :name", Map("name" -> "replaced")), + Row("{\"name\":\"joe\"}replaced") + ) + } + } } From 55a245bbbf3542f6f91c5afe3d0aaee365394ff4 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Fri, 7 Nov 2025 12:37:21 -0800 Subject: [PATCH 22/37] more fixes --- .../spark/sql/classic/SparkSession.scala | 62 ++++++++++--------- .../analyzer-results/bitwise.sql.out | 45 ++++++++++++-- .../sql-tests/inputs/execute-immediate.sql | 8 +++ .../sql-tests/results/bitwise.sql.out | 45 ++++++++++++-- 4 files changed, 120 insertions(+), 40 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala index 706a1bfb9ebe..84838e6e70d4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala @@ -45,11 +45,11 @@ import org.apache.spark.sql.catalyst.analysis.{GeneralParameterizedQuery, NamePa import org.apache.spark.sql.catalyst.encoders._ import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression, Literal} import org.apache.spark.sql.catalyst.parser.{HybridParameterContext, NamedParameterContext, ParserInterface, PositionalParameterContext} -import org.apache.spark.sql.catalyst.plans.logical.{CompoundBody, LocalRelation, OneRowRelation, Project, Range} +import org.apache.spark.sql.catalyst.plans.logical.{CompoundBody, LocalRelation, LogicalPlan, OneRowRelation, Project, Range} import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.classic.SparkSession.applyAndLoadExtensions -import org.apache.spark.sql.errors.SqlScriptingErrors +import org.apache.spark.sql.errors.{QueryCompilationErrors, SqlScriptingErrors} import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.command.ExternalCommandExecutor import org.apache.spark.sql.execution.datasources.LogicalRelation @@ -559,31 +559,28 @@ class SparkSession private( args: Map[String, Any], tracker: QueryPlanningTracker): DataFrame = withActive { - // Always set parameter context if we have actual parameters - if (args.nonEmpty) { - // Resolve and validate parameters first - val resolvedParams = resolveAndValidateParameters(args.transform((_, v) => lit(v).expr)) - val paramContext = NamedParameterContext(resolvedParams) - val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) { - val parsedPlan = sessionState.sqlParser.parsePlanWithParameters(sqlText, paramContext) - // In legacy mode, wrap the parsed plan with NameParameterizedQuery - // so that the BindParameters analyzer rule can bind the parameters - if (sessionState.conf.legacyParameterSubstitutionConstantsOnly) { - NameParameterizedQuery(parsedPlan, paramContext.params) - } else { - parsedPlan - } - } - - Dataset.ofRows(self, plan, tracker) + // Always parse with parameter context to detect unbound parameter markers. + // Even if args is empty, we need to detect and reject parameter markers in the SQL. + val resolvedParams = if (args.nonEmpty) { + resolveAndValidateParameters(args.transform((_, v) => lit(v).expr)) } else { - // No parameters - parse normally without parameter context - val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) { - val paramContext = HybridParameterContext(Seq.empty, Seq.empty) - sessionState.sqlParser.parsePlanWithParameters(sqlText, paramContext) + Map.empty[String, Expression] + } + val paramContext = NamedParameterContext(resolvedParams) + val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) { + val parsedPlan = sessionState.sqlParser.parsePlanWithParameters(sqlText, paramContext) + val queryPlan = parsedPlan match { + case compoundBody: CompoundBody => compoundBody + case logicalPlan: LogicalPlan => + if (args.nonEmpty) { + NameParameterizedQuery(logicalPlan, paramContext.params) + } else { + logicalPlan + } } - Dataset.ofRows(self, plan, tracker) + queryPlan } + Dataset.ofRows(self, plan, tracker) } /** @inheritdoc */ @@ -616,6 +613,8 @@ class SparkSession private( tracker: QueryPlanningTracker): DataFrame = withActive { val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) { + // Always parse with parameter context to detect unbound parameter markers. + // Even if args is empty, we need to detect and reject parameter markers in the SQL. val parsedPlan = if (args.nonEmpty) { // Resolve and validate parameter arguments val paramMap = args.zipWithIndex.map { case (arg, idx) => @@ -649,11 +648,6 @@ class SparkSession private( val parsed = sessionState.sqlParser.parsePlanWithParameters(sqlText, paramContext) - // Check for SQL scripting with positional parameters - if (parsed.isInstanceOf[CompoundBody] && paramNames.isEmpty) { - throw SqlScriptingErrors.positionalParametersAreNotSupportedWithSqlScripting() - } - // In legacy mode, wrap with GeneralParameterizedQuery for analyzer binding if (sessionState.conf.legacyParameterSubstitutionConstantsOnly) { GeneralParameterizedQuery( @@ -665,8 +659,16 @@ class SparkSession private( parsed } } else { - sessionState.sqlParser.parsePlan(sqlText) + // No arguments provided, but still need to detect parameter markers + val paramContext = HybridParameterContext(Seq.empty, Seq.empty) + sessionState.sqlParser.parsePlanWithParameters(sqlText, paramContext) } + + // Check for SQL scripts in EXECUTE IMMEDIATE (applies to both empty and non-empty args) + if (parsedPlan.isInstanceOf[CompoundBody]) { + throw QueryCompilationErrors.sqlScriptInExecuteImmediate(sqlText) + } + parsedPlan } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out index 35033e4a2d96..d7130b48a312 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out @@ -339,7 +339,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 21, + "fragment" : "SELECT 20181117 > > 2" + } ] } @@ -353,7 +360,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'<'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 21, + "fragment" : "SELECT 20181117 < < 2" + } ] } @@ -367,7 +381,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>>'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 22, + "fragment" : "SELECT 20181117 > >> 2" + } ] } @@ -381,7 +402,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'<'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 21, + "fragment" : "SELECT 20181117 <<< 2" + } ] } @@ -395,7 +423,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 22, + "fragment" : "SELECT 20181117 >>>> 2" + } ] } diff --git a/sql/core/src/test/resources/sql-tests/inputs/execute-immediate.sql b/sql/core/src/test/resources/sql-tests/inputs/execute-immediate.sql index 17fa47be4eec..16e1850d5e59 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/execute-immediate.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/execute-immediate.sql @@ -289,3 +289,11 @@ EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING MAP('key1', 'valu -- !query EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING MAP(1, 'one', 2, 'two') AS p; +-- !query +-- Test unbound parameter markers without USING clause +-- named parameter without USING clause should fail +EXECUTE IMMEDIATE 'SELECT :param'; + +-- !query +-- positional parameter without USING clause should fail +EXECUTE IMMEDIATE 'SELECT ?'; diff --git a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out index 7233b0d0ae49..58ceceaad188 100644 --- a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out @@ -360,7 +360,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 21, + "fragment" : "SELECT 20181117 > > 2" + } ] } @@ -376,7 +383,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'<'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 21, + "fragment" : "SELECT 20181117 < < 2" + } ] } @@ -392,7 +406,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>>'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 22, + "fragment" : "SELECT 20181117 > >> 2" + } ] } @@ -408,7 +429,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'<'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 21, + "fragment" : "SELECT 20181117 <<< 2" + } ] } @@ -424,7 +452,14 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>'", "hint" : "" - } + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 1, + "stopIndex" : 22, + "fragment" : "SELECT 20181117 >>>> 2" + } ] } From f6a62149ab536c326c45934f2fb7d3e3d003f8d2 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Fri, 7 Nov 2025 17:03:43 -0800 Subject: [PATCH 23/37] Regen execute-immediate and suppress unnecessary error context --- .../spark/sql/execution/SparkSqlParser.scala | 12 ++++- .../execute-immediate.sql.out | 40 +++++++++++++++++ .../results/execute-immediate.sql.out | 44 +++++++++++++++++++ 3 files changed, 94 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index ef00f30fbdda..550c23e3e830 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -118,14 +118,22 @@ class SparkSqlParser extends AbstractSqlParser { // Step 2: Apply parameter substitution if a parameter context is provided. val (paramSubstituted, positionMapper, hasParameters) = parameterContext match { case Some(context) => + // Check if the context actually contains parameters + val contextHasParams = context match { + case NamedParameterContext(params) => params.nonEmpty + case PositionalParameterContext(params) => params.nonEmpty + case HybridParameterContext(args, _) => args.nonEmpty + } if (SQLConf.get.legacyParameterSubstitutionConstantsOnly) { // Legacy mode: Parameters are detected but substitution is deferred to analysis phase. - (variableSubstituted, PositionMapper.identity(variableSubstituted), true) + // Only set hasParameters if the context actually contains parameters. + (variableSubstituted, PositionMapper.identity(variableSubstituted), contextHasParams) } else { // Modern mode: Perform parameter substitution during parsing. val (substituted, mapper) = ParameterHandler.substituteParameters(variableSubstituted, context) - (substituted, mapper, true) + // Only set hasParameters if the context actually contains parameters. + (substituted, mapper, contextHasParams) } case None => // No parameter context provided; skip parameter substitution. diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out index 1271f730d1e5..c874945badb1 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out @@ -1224,3 +1224,43 @@ EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING MAP(1, 'one', 2, -- !query analysis Project [typeof(map(1, one, 2, two)) AS type#x, map(1, one, 2, two) AS val#x] +- OneRowRelation + + +-- !query +EXECUTE IMMEDIATE 'SELECT :param' +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNBOUND_SQL_PARAMETER", + "sqlState" : "42P02", + "messageParameters" : { + "name" : "param" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 13, + "fragment" : ":param" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT ?' +-- !query analysis +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNBOUND_SQL_PARAMETER", + "sqlState" : "42P02", + "messageParameters" : { + "name" : "_7" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 8, + "fragment" : "?" + } ] +} diff --git a/sql/core/src/test/resources/sql-tests/results/execute-immediate.sql.out b/sql/core/src/test/resources/sql-tests/results/execute-immediate.sql.out index 06adf4435046..dd1207b4f2be 100644 --- a/sql/core/src/test/resources/sql-tests/results/execute-immediate.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/execute-immediate.sql.out @@ -1211,3 +1211,47 @@ EXECUTE IMMEDIATE 'SELECT typeof(:p) as type, :p as val' USING MAP(1, 'one', 2, struct> -- !query output map {1:"one",2:"two"} + + +-- !query +EXECUTE IMMEDIATE 'SELECT :param' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNBOUND_SQL_PARAMETER", + "sqlState" : "42P02", + "messageParameters" : { + "name" : "param" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 13, + "fragment" : ":param" + } ] +} + + +-- !query +EXECUTE IMMEDIATE 'SELECT ?' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNBOUND_SQL_PARAMETER", + "sqlState" : "42P02", + "messageParameters" : { + "name" : "_7" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 8, + "fragment" : "?" + } ] +} From 6aad3dd642dc87a720e6ffcec6fa81213d6ebeaf Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Fri, 7 Nov 2025 20:15:20 -0800 Subject: [PATCH 24/37] Undo mistaken testacses updates --- .../analyzer-results/bitwise.sql.out | 45 +++---------------- .../sql-tests/results/bitwise.sql.out | 45 +++---------------- 2 files changed, 10 insertions(+), 80 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out index d7130b48a312..35033e4a2d96 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out @@ -339,14 +339,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 21, - "fragment" : "SELECT 20181117 > > 2" - } ] + } } @@ -360,14 +353,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'<'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 21, - "fragment" : "SELECT 20181117 < < 2" - } ] + } } @@ -381,14 +367,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>>'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 22, - "fragment" : "SELECT 20181117 > >> 2" - } ] + } } @@ -402,14 +381,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'<'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 21, - "fragment" : "SELECT 20181117 <<< 2" - } ] + } } @@ -423,14 +395,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 22, - "fragment" : "SELECT 20181117 >>>> 2" - } ] + } } diff --git a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out index 58ceceaad188..7233b0d0ae49 100644 --- a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out @@ -360,14 +360,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 21, - "fragment" : "SELECT 20181117 > > 2" - } ] + } } @@ -383,14 +376,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'<'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 21, - "fragment" : "SELECT 20181117 < < 2" - } ] + } } @@ -406,14 +392,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>>'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 22, - "fragment" : "SELECT 20181117 > >> 2" - } ] + } } @@ -429,14 +408,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'<'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 21, - "fragment" : "SELECT 20181117 <<< 2" - } ] + } } @@ -452,14 +424,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "messageParameters" : { "error" : "'>'", "hint" : "" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 1, - "stopIndex" : 22, - "fragment" : "SELECT 20181117 >>>> 2" - } ] + } } From e963e97e61ed0ce058d4bfa7c72134af408e0653 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Sat, 8 Nov 2025 07:43:36 -0800 Subject: [PATCH 25/37] fix explain testcases --- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 74cdee49e55a..7828f3c27e09 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4779,7 +4779,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark // scalastyle:off """== Physical Plan == |Execute ExplainCommand - | +- ExplainCommand ExplainCommand 'Project [unresolvedalias(1)], SimpleMode, SimpleMode + | +- ExplainCommand ExplainCommand 'NameParameterizedQuery [first], [1], SimpleMode, SimpleMode |""" // scalastyle:on From db238de91a8f255da72e63672be4d1cb3ee1af68 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Sat, 8 Nov 2025 10:25:42 -0800 Subject: [PATCH 26/37] Refine explain fix --- .../scala/org/apache/spark/sql/classic/SparkSession.scala | 7 ++++--- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala index 84838e6e70d4..c47e84541621 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/SparkSession.scala @@ -517,11 +517,11 @@ class SparkSession private( val parsed = sessionState.sqlParser.parsePlanWithParameters(sqlText, paramContext) // Check for SQL scripting with positional parameters - if (parsed.isInstanceOf[CompoundBody]) { + if (parsed.isInstanceOf[CompoundBody] && args.nonEmpty) { throw SqlScriptingErrors.positionalParametersAreNotSupportedWithSqlScripting() } // In legacy mode, wrap with PosParameterizedQuery for analyzer binding - if (sessionState.conf.legacyParameterSubstitutionConstantsOnly) { + if (args.nonEmpty && sessionState.conf.legacyParameterSubstitutionConstantsOnly) { PosParameterizedQuery(parsed, paramContext.params) } else { parsed @@ -572,7 +572,8 @@ class SparkSession private( val queryPlan = parsedPlan match { case compoundBody: CompoundBody => compoundBody case logicalPlan: LogicalPlan => - if (args.nonEmpty) { + // In legacy mode, wrap with NameParameterizedQuery for analyzer binding + if (args.nonEmpty && sessionState.conf.legacyParameterSubstitutionConstantsOnly) { NameParameterizedQuery(logicalPlan, paramContext.params) } else { logicalPlan diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 7828f3c27e09..74cdee49e55a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4779,7 +4779,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark // scalastyle:off """== Physical Plan == |Execute ExplainCommand - | +- ExplainCommand ExplainCommand 'NameParameterizedQuery [first], [1], SimpleMode, SimpleMode + | +- ExplainCommand ExplainCommand 'Project [unresolvedalias(1)], SimpleMode, SimpleMode |""" // scalastyle:on From d0836e09ab2fca8eed6879fd59958771a0fb4330 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Mon, 10 Nov 2025 06:54:42 -0800 Subject: [PATCH 27/37] Mocve instructions around --- .../catalyst/parser/DataTypeAstBuilder.scala | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 76434ee2bcb8..d768bb155d2c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -35,32 +35,6 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, /** * AST builder for parsing data type definitions and table schemas. * - * ==CRITICAL: Extracting Identifier Names== - * - * When extracting identifier names from parser contexts, you MUST use the helper methods provided - * by this class instead of calling ctx.getText() directly: - * - * - '''getIdentifierText(ctx)''': For single identifiers (column names, aliases, window names) - * - '''getIdentifierParts(ctx)''': For qualified identifiers (table names, schema.table) - * - * '''DO NOT use ctx.getText() or ctx.identifier.getText()''' directly! These methods do not - * handle the IDENTIFIER('literal') syntax and will cause incorrect behavior. - * - * The IDENTIFIER('literal') syntax allows string literals to be used as identifiers at parse time - * (e.g., IDENTIFIER('my_col') resolves to the identifier my_col). If you use getText(), you'll - * get the raw text "IDENTIFIER('my_col')" instead of "my_col", breaking the feature. - * - * Example: - * {{{ - * // WRONG - does not handle IDENTIFIER('literal'): - * val name = ctx.identifier.getText - * SubqueryAlias(ctx.name.getText, plan) - * - * // CORRECT - handles both regular identifiers and IDENTIFIER('literal'): - * val name = getIdentifierText(ctx.identifier) - * SubqueryAlias(getIdentifierText(ctx.name), plan) - * }}} - * * This is a client-side parser designed specifically for parsing data type strings (e.g., "INT", * "STRUCT") and table schemas. It assumes that the input does not contain * parameter markers (`:name` or `?`), as parameter substitution should occur before data types @@ -86,6 +60,32 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, * * @see * [[org.apache.spark.sql.catalyst.parser.AstBuilder]] for the full SQL statement parser + * + * ==CRITICAL: Extracting Identifier Names== + * + * When extracting identifier names from parser contexts, you MUST use the helper methods provided + * by this class instead of calling ctx.getText() directly: + * + * - '''getIdentifierText(ctx)''': For single identifiers (column names, aliases, window names) + * - '''getIdentifierParts(ctx)''': For qualified identifiers (table names, schema.table) + * + * '''DO NOT use ctx.getText() or ctx.identifier.getText()''' directly! These methods do not + * handle the IDENTIFIER('literal') syntax and will cause incorrect behavior. + * + * The IDENTIFIER('literal') syntax allows string literals to be used as identifiers at parse time + * (e.g., IDENTIFIER('my_col') resolves to the identifier my_col). If you use getText(), you'll + * get the raw text "IDENTIFIER('my_col')" instead of "my_col", breaking the feature. + * + * Example: + * {{{ + * // WRONG - does not handle IDENTIFIER('literal'): + * val name = ctx.identifier.getText + * SubqueryAlias(ctx.name.getText, plan) + * + * // CORRECT - handles both regular identifiers and IDENTIFIER('literal'): + * val name = getIdentifierText(ctx.identifier) + * SubqueryAlias(getIdentifierText(ctx.name), plan) + * }}} */ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeErrorsBase { protected def typedVisit[T](ctx: ParseTree): T = { From 7fab3d4bdb3b7d5f9b6e9dd28b3619740c5dedd6 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 11 Nov 2025 06:54:17 -0800 Subject: [PATCH 28/37] Comments by Daniel --- .../sql/catalyst/parser/DataTypeAstBuilder.scala | 11 +++-------- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 3 +-- .../sql/catalyst/parser/SubstituteParamsParser.scala | 3 ++- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index d768bb155d2c..48a91f4e9c6e 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -201,9 +201,8 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE } /** - * Parse a string into a multi-part identifier. This method is intended to be overridden by - * subclasses that have access to a full SQL parser. The base implementation simply returns the - * input as a single-part identifier. + * Parse a string into a multi-part identifier. Subclasses MUST implement this method to provide + * proper multi-part identifier parsing with access to a full SQL parser. * * For example, in AstBuilder, this would parse "`catalog`.`schema`.`table`" into Seq("catalog", * "schema", "table"). @@ -213,11 +212,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE * @return * Sequence of identifier parts. */ - protected def parseMultipartIdentifier(identifier: String): Seq[String] = { - // Base implementation: just return the string as a single part. - // Subclasses with access to a full parser should override this. - Seq(identifier) - } + protected def parseMultipartIdentifier(identifier: String): Seq[String] /** * Get the identifier parts from a context, handling both regular identifiers and diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 685906cabc70..0a42aed6c76f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3381,8 +3381,7 @@ class AstBuilder extends DataTypeAstBuilder expression(ctx.base) match { case unresolved_attr @ UnresolvedAttribute(nameParts) => // For regex check, we need the original text before identifier-lite resolution - val originalText = ctx.fieldName.getStart.getText - originalText match { + ctx.fieldName.getStart.getText match { case escapedIdentifier(columnNameRegex) if conf.supportQuotedRegexColumnName && isRegex(columnNameRegex) && canApplyRegex(ctx) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala index a615d012811a..9beead0e6487 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SubstituteParamsParser.scala @@ -205,7 +205,8 @@ class SubstituteParamsParser extends Logging { (result(substitution.start - 1) == '\'' || result(substitution.start - 1) == '"') && replacement.nonEmpty && !replacement(0).isWhitespace - result = prefix + (if (needsSpace) " " else "") + replacement + suffix + val space = if (needsSpace) " " else "" + result = s"$prefix$space$replacement$suffix" } result } From c20e2de26258c39b4a3cde1d378b3e99df4243c9 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 11 Nov 2025 07:40:06 -0800 Subject: [PATCH 29/37] Fix compile error --- .../org/apache/spark/sql/catalyst/parser/parsers.scala | 10 +++++++++- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 3 +-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala index 281124306a50..3da0aac9fe24 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala @@ -430,7 +430,15 @@ case class UnclosedCommentProcessor(command: String, tokenStream: CommonTokenStr } object DataTypeParser extends AbstractParser { - override protected def astBuilder: DataTypeAstBuilder = new DataTypeAstBuilder + override protected def astBuilder: DataTypeAstBuilder = new DataTypeAstBuilder { + // DataTypeParser only parses data types, not full SQL. + // Multi-part identifiers should never appear in IDENTIFIER() within type definitions. + override protected def parseMultipartIdentifier(identifier: String): Seq[String] = { + throw SparkException.internalError( + "DataTypeParser does not support multi-part identifiers in IDENTIFIER(). " + + s"Attempted to parse: $identifier") + } + } } object AbstractParser extends Logging { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 0a42aed6c76f..39129e3aec87 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3419,8 +3419,7 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitColumnReference(ctx: ColumnReferenceContext): Expression = withOrigin(ctx) { // For regex check, we need the original text before identifier-lite resolution - val originalText = ctx.getStart.getText - originalText match { + ctx.getStart.getText match { case escapedIdentifier(columnNameRegex) if conf.supportQuotedRegexColumnName && isRegex(columnNameRegex) && canApplyRegex(ctx) => From 480b3ae2ddb3597c8130b713275c42b3e477efaa Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 11 Nov 2025 08:52:30 -0800 Subject: [PATCH 30/37] Fix compile error --- .../apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 48a91f4e9c6e..21fbc8daf102 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -87,7 +87,9 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, * SubqueryAlias(getIdentifierText(ctx.name), plan) * }}} */ -class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeErrorsBase { +abstract class DataTypeAstBuilder + extends SqlBaseParserBaseVisitor[AnyRef] + with DataTypeErrorsBase { protected def typedVisit[T](ctx: ParseTree): T = { ctx.accept(this).asInstanceOf[T] } From 3593f5ab1e8e19b3b4b6d34a3a7528ede1d1f2be Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 11 Nov 2025 09:09:03 -0800 Subject: [PATCH 31/37] Still fixing compoile errors --- .../apache/spark/sql/catalyst/parser/ParserUtils.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index b9ad20d4d2a9..d55010769842 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -58,8 +58,13 @@ object ParserUtils extends SparkParserUtils { * @return The resolved identifier text as a dot-separated string. */ def getMultipartIdentifierText(ctx: MultipartIdentifierContext): String = { - // Use DataTypeAstBuilder to properly extract identifier parts. - val astBuilder = new DataTypeAstBuilder() + // Create a minimal DataTypeAstBuilder instance for identifier extraction. + // We delegate parseMultipartIdentifier to CatalystSqlParser. + val astBuilder = new DataTypeAstBuilder { + override protected def parseMultipartIdentifier(identifier: String): Seq[String] = { + CatalystSqlParser.parseMultipartIdentifier(identifier) + } + } ctx.parts.asScala.flatMap { part => astBuilder.extractIdentifierParts(part) }.mkString(".") From 6f3e0609b4a10bd155117dcfe64730f8514acaef Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 11 Nov 2025 10:02:58 -0800 Subject: [PATCH 32/37] simplify --- .../catalyst/parser/DataTypeAstBuilder.scala | 17 +++++++++++------ .../spark/sql/catalyst/parser/parsers.scala | 10 +--------- .../spark/sql/catalyst/parser/ParserUtils.scala | 9 ++------- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index 21fbc8daf102..d73064dc70a1 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -87,9 +87,7 @@ import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, * SubqueryAlias(getIdentifierText(ctx.name), plan) * }}} */ -abstract class DataTypeAstBuilder - extends SqlBaseParserBaseVisitor[AnyRef] - with DataTypeErrorsBase { +class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeErrorsBase { protected def typedVisit[T](ctx: ParseTree): T = { ctx.accept(this).asInstanceOf[T] } @@ -203,18 +201,25 @@ abstract class DataTypeAstBuilder } /** - * Parse a string into a multi-part identifier. Subclasses MUST implement this method to provide - * proper multi-part identifier parsing with access to a full SQL parser. + * Parse a string into a multi-part identifier. Subclasses should override this method to + * provide proper multi-part identifier parsing with access to a full SQL parser. * * For example, in AstBuilder, this would parse "`catalog`.`schema`.`table`" into Seq("catalog", * "schema", "table"). * + * The base implementation fails with an assertion to catch cases where multi-part identifiers + * are used without a proper parser implementation. + * * @param identifier * The identifier string to parse, potentially containing dots and backticks. * @return * Sequence of identifier parts. */ - protected def parseMultipartIdentifier(identifier: String): Seq[String] + protected def parseMultipartIdentifier(identifier: String): Seq[String] = { + throw SparkException.internalError( + "parseMultipartIdentifier must be overridden by subclasses. " + + s"Attempted to parse: $identifier") + } /** * Get the identifier parts from a context, handling both regular identifiers and diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala index 3da0aac9fe24..281124306a50 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala @@ -430,15 +430,7 @@ case class UnclosedCommentProcessor(command: String, tokenStream: CommonTokenStr } object DataTypeParser extends AbstractParser { - override protected def astBuilder: DataTypeAstBuilder = new DataTypeAstBuilder { - // DataTypeParser only parses data types, not full SQL. - // Multi-part identifiers should never appear in IDENTIFIER() within type definitions. - override protected def parseMultipartIdentifier(identifier: String): Seq[String] = { - throw SparkException.internalError( - "DataTypeParser does not support multi-part identifiers in IDENTIFIER(). " + - s"Attempted to parse: $identifier") - } - } + override protected def astBuilder: DataTypeAstBuilder = new DataTypeAstBuilder } object AbstractParser extends Logging { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index d55010769842..b9ad20d4d2a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -58,13 +58,8 @@ object ParserUtils extends SparkParserUtils { * @return The resolved identifier text as a dot-separated string. */ def getMultipartIdentifierText(ctx: MultipartIdentifierContext): String = { - // Create a minimal DataTypeAstBuilder instance for identifier extraction. - // We delegate parseMultipartIdentifier to CatalystSqlParser. - val astBuilder = new DataTypeAstBuilder { - override protected def parseMultipartIdentifier(identifier: String): Seq[String] = { - CatalystSqlParser.parseMultipartIdentifier(identifier) - } - } + // Use DataTypeAstBuilder to properly extract identifier parts. + val astBuilder = new DataTypeAstBuilder() ctx.parts.asScala.flatMap { part => astBuilder.extractIdentifierParts(part) }.mkString(".") From ff191f00d87c32ee8f429c916693985dd3312b36 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Tue, 11 Nov 2025 12:37:56 -0800 Subject: [PATCH 33/37] Fix sql scripting regression --- .../org/apache/spark/sql/catalyst/parser/parsers.scala | 10 +++++++++- .../apache/spark/sql/catalyst/parser/ParserUtils.scala | 9 +++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala index 281124306a50..32270df0a988 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala @@ -430,7 +430,15 @@ case class UnclosedCommentProcessor(command: String, tokenStream: CommonTokenStr } object DataTypeParser extends AbstractParser { - override protected def astBuilder: DataTypeAstBuilder = new DataTypeAstBuilder + override protected def astBuilder: DataTypeAstBuilder = new DataTypeAstBuilder { + // DataTypeParser only parses data types, not full SQL. + // Multi-part identifiers should not appear in IDENTIFIER() within type definitions. + override protected def parseMultipartIdentifier(identifier: String): Seq[String] = { + throw SparkException.internalError( + "DataTypeParser does not support multi-part identifiers in IDENTIFIER(). " + + s"Attempted to parse: $identifier") + } + } } object AbstractParser extends Logging { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index b9ad20d4d2a9..9df5ada12ac2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -58,8 +58,13 @@ object ParserUtils extends SparkParserUtils { * @return The resolved identifier text as a dot-separated string. */ def getMultipartIdentifierText(ctx: MultipartIdentifierContext): String = { - // Use DataTypeAstBuilder to properly extract identifier parts. - val astBuilder = new DataTypeAstBuilder() + // Create a DataTypeAstBuilder that delegates multi-part identifier parsing to + // CatalystSqlParser. + val astBuilder = new DataTypeAstBuilder { + override protected def parseMultipartIdentifier(identifier: String): Seq[String] = { + CatalystSqlParser.parseMultipartIdentifier(identifier) + } + } ctx.parts.asScala.flatMap { part => astBuilder.extractIdentifierParts(part) }.mkString(".") From 3e6304e3796943c0b5e519b534ca14bfe32328c0 Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 12 Nov 2025 08:35:54 -0800 Subject: [PATCH 34/37] Address comments by wenchen --- .../sql/catalyst/parser/SqlBaseParser.g4 | 1 - .../catalyst/parser/DataTypeAstBuilder.scala | 33 +++---------------- .../sql/catalyst/parser/AstBuilder.scala | 19 +++-------- .../spark/sql/execution/SparkStrategies.scala | 6 +--- .../identifier-clause.sql.out | 2 +- .../results/identifier-clause.sql.out | 2 +- .../apache/spark/sql/ParametersSuite.scala | 2 +- 7 files changed, 13 insertions(+), 52 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 960b1867632d..cea29b939f3b 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1593,7 +1593,6 @@ qualifiedName // valid expressions such as "a-b" can be recognized as an identifier errorCapturingIdentifier : identifier errorCapturingIdentifierExtra #errorCapturingIdentifierBase - | {!legacy_identifier_clause_only}? IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN errorCapturingIdentifierExtra #identifierLiteralWithExtra ; // extra left-factoring grammar diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index d73064dc70a1..cf7dca407197 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -242,12 +242,6 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE // Parse the string to handle qualified identifiers like "`cat`.`schema`". parseMultipartIdentifier(literalValue) - case idLitCtx: IdentifierLiteralWithExtraContext => - // For IDENTIFIER('literal') in errorCapturingIdentifier. - val literalValue = string(visitStringLit(idLitCtx.stringLit())) - // Parse the string to handle qualified identifiers like "`cat`.`schema`". - parseMultipartIdentifier(literalValue) - case base: ErrorCapturingIdentifierBaseContext => // Regular identifier with errorCapturingIdentifierExtra. // Need to recursively handle identifier which might itself be IDENTIFIER('literal'). @@ -284,29 +278,10 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE */ override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] = withOrigin(ctx) { - ctx.parts.asScala.flatMap { part => - // Each part is an errorCapturingIdentifier, which can be either: - // 1. identifier errorCapturingIdentifierExtra (regular path) - labeled as - // #errorCapturingIdentifierBase - // 2. IDENTIFIER_KW LEFT_PAREN stringLit RIGHT_PAREN errorCapturingIdentifierExtra - // (identifier-lite path) - labeled as #identifierLiteralWithExtra - part match { - case idLitWithExtra: IdentifierLiteralWithExtraContext => - // This is identifier-lite: IDENTIFIER('string') - getIdentifierParts(idLitWithExtra) - case base: ErrorCapturingIdentifierBaseContext => - // Regular identifier path - val identifierCtx = base.identifier() - if (identifierCtx != null && identifierCtx.strictIdentifier() != null) { - getIdentifierParts(identifierCtx.strictIdentifier()) - } else { - Seq(part.getText) - } - case _ => - // Fallback for other cases - Seq(part.getText) - } - }.toSeq + // Each part is an errorCapturingIdentifier (which wraps identifier). + // getIdentifierParts recursively handles IDENTIFIER('literal') syntax through + // identifier -> strictIdentifier -> identifierLiteral. + ctx.parts.asScala.flatMap(getIdentifierParts).toSeq } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 39129e3aec87..9892605dc372 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1986,17 +1986,18 @@ class AstBuilder extends DataTypeAstBuilder // this is needed to create unpivot and to filter unpivot for nulls further down val valueColumnNames = Option(ctx.unpivotOperator().unpivotSingleValueColumnClause()) - .map(_.unpivotValueColumn().identifier().getText) + .map(vc => getIdentifierText(vc.unpivotValueColumn().identifier())) .map(Seq(_)) .getOrElse( Option(ctx.unpivotOperator().unpivotMultiValueColumnClause()) - .map(_.unpivotValueColumns.asScala.map(_.identifier().getText).toSeq) + .map(_.unpivotValueColumns.asScala.map(vc => + getIdentifierText(vc.identifier())).toSeq) .get ) val unpivot = if (ctx.unpivotOperator().unpivotSingleValueColumnClause() != null) { val unpivotClause = ctx.unpivotOperator().unpivotSingleValueColumnClause() - val variableColumnName = unpivotClause.unpivotNameColumn().identifier().getText + val variableColumnName = getIdentifierText(unpivotClause.unpivotNameColumn().identifier()) val (unpivotColumns, unpivotAliases) = unpivotClause.unpivotColumns.asScala.map(visitUnpivotColumnAndAlias).toSeq.unzip @@ -2011,7 +2012,7 @@ class AstBuilder extends DataTypeAstBuilder ) } else { val unpivotClause = ctx.unpivotOperator().unpivotMultiValueColumnClause() - val variableColumnName = unpivotClause.unpivotNameColumn().identifier().getText + val variableColumnName = getIdentifierText(unpivotClause.unpivotNameColumn().identifier()) val (unpivotColumns, unpivotAliases) = unpivotClause.unpivotColumnSets.asScala.map(visitUnpivotColumnSet).toSeq.unzip @@ -2992,10 +2993,8 @@ class AstBuilder extends DataTypeAstBuilder } else { // If the parser is not in ansi mode, we should return `UnresolvedAttribute`, in case there // are columns named `CURRENT_DATE` or `CURRENT_TIMESTAMP` or `CURRENT_TIME`. - // scalastyle:off parser.gettext // ctx.name is a token, not an identifier context. UnresolvedAttribute.quoted(ctx.name.getText) - // scalastyle:on parser.gettext } } @@ -6465,18 +6464,14 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitTimestampadd(ctx: TimestampaddContext): Expression = withOrigin(ctx) { if (ctx.invalidUnit != null) { - // scalastyle:off parser.gettext // ctx.name and ctx.invalidUnit are tokens, not identifier contexts. throw QueryParsingErrors.invalidDatetimeUnitError( ctx, ctx.name.getText, ctx.invalidUnit.getText) - // scalastyle:on parser.gettext } else { - // scalastyle:off parser.gettext // ctx.unit is a token, not an identifier context. TimestampAdd(ctx.unit.getText, expression(ctx.unitsAmount), expression(ctx.timestamp)) - // scalastyle:on parser.gettext } } @@ -6485,18 +6480,14 @@ class AstBuilder extends DataTypeAstBuilder */ override def visitTimestampdiff(ctx: TimestampdiffContext): Expression = withOrigin(ctx) { if (ctx.invalidUnit != null) { - // scalastyle:off parser.gettext // ctx.name and ctx.invalidUnit are tokens, not identifier contexts. throw QueryParsingErrors.invalidDatetimeUnitError( ctx, ctx.name.getText, ctx.invalidUnit.getText) - // scalastyle:on parser.gettext } else { - // scalastyle:off parser.gettext // ctx.unit is a token, not an identifier context. TimestampDiff(ctx.unit.getText, expression(ctx.startTimestamp), expression(ctx.endTimestamp)) - // scalastyle:on parser.gettext } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index ef6eec86f7ef..5efad83bcba7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -1125,11 +1125,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case r: NamedRelation => r.name case _ => - // Try to get name from SubqueryAlias before unwrapping - table match { - case logical.SubqueryAlias(name, _) => name.toString - case _ => "table" - } + "unknown" } } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out index 63d6f01e2926..fc8d225e2271 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out @@ -2144,7 +2144,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "sqlState" : "42601", "messageParameters" : { "error" : "'FROM'", - "hint" : "" + "hint" : ": missing ')'" } } diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out index 75fa8735a7e9..4a0f9abe5639 100644 --- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out @@ -2356,7 +2356,7 @@ org.apache.spark.sql.catalyst.parser.ParseException "sqlState" : "42601", "messageParameters" : { "error" : "'FROM'", - "hint" : "" + "hint" : ": missing ')'" } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala index 53e661f83d4c..40cfb54159d5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala @@ -2419,7 +2419,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession { stop = 12)) } - test("detect unbound positional parameter with nop arguments") { + test("detect unbound positional parameter with no arguments") { val exception = intercept[AnalysisException] { spark.sql("SELECT ?") } From 8796d842c7eed81b603229d4a7faba1f873b23fe Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 12 Nov 2025 10:33:28 -0800 Subject: [PATCH 35/37] Address wenchen's comments. Reduce SQL Scripting labels, conditions, for loop label to strictIdentifier --- .../sql/catalyst/parser/SqlBaseParser.g4 | 12 +-- .../catalyst/parser/DataTypeAstBuilder.scala | 6 +- .../sql/catalyst/parser/AstBuilder.scala | 28 +++--- .../sql/catalyst/parser/ParserUtils.scala | 97 ++++++++----------- .../parser/SqlScriptingParserSuite.scala | 18 ++-- 5 files changed, 68 insertions(+), 93 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index cea29b939f3b..4099d5506e1f 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -99,7 +99,7 @@ sqlStateValue ; declareConditionStatement - : DECLARE multipartIdentifier CONDITION (FOR SQLSTATE VALUE? sqlStateValue)? + : DECLARE strictIdentifier CONDITION (FOR SQLSTATE VALUE? sqlStateValue)? ; conditionValue @@ -132,11 +132,11 @@ repeatStatement ; leaveStatement - : LEAVE multipartIdentifier + : LEAVE strictIdentifier ; iterateStatement - : ITERATE multipartIdentifier + : ITERATE strictIdentifier ; caseStatement @@ -151,7 +151,7 @@ loopStatement ; forStatement - : beginLabel? FOR (multipartIdentifier AS)? query DO compoundBody END FOR endLabel? + : beginLabel? FOR (strictIdentifier AS)? query DO compoundBody END FOR endLabel? ; singleStatement @@ -159,11 +159,11 @@ singleStatement ; beginLabel - : multipartIdentifier COLON + : strictIdentifier COLON ; endLabel - : multipartIdentifier + : strictIdentifier ; singleExpression diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index cf7dca407197..a07a8171d114 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -207,8 +207,10 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE * For example, in AstBuilder, this would parse "`catalog`.`schema`.`table`" into Seq("catalog", * "schema", "table"). * - * The base implementation fails with an assertion to catch cases where multi-part identifiers - * are used without a proper parser implementation. + * This method is only called when parsing IDENTIFIER('literal') where the literal contains a + * qualified identifier (e.g., IDENTIFIER('schema.table')). Since DataTypeAstBuilder only parses + * data types (not full SQL with qualified table names), this should never be called in + * practice. The base implementation throws an error to catch unexpected usage. * * @param identifier * The identifier string to parse, potentially containing dots and backticks. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 9892605dc372..c32491648417 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -264,12 +264,6 @@ class AstBuilder extends DataTypeAstBuilder private def visitDeclareConditionStatementImpl( ctx: DeclareConditionStatementContext): ErrorCondition = { - // Qualified user defined condition name is not allowed. - if (ctx.multipartIdentifier().parts.size() > 1) { - throw SqlScriptingErrors - .conditionCannotBeQualified(CurrentOrigin.get, ctx.multipartIdentifier().getText) - } - // If SQLSTATE is not provided, default to 45000. val sqlState = Option(ctx.sqlStateValue()) .map(sqlStateValueContext => string(visitStringLit(sqlStateValueContext.stringLit()))) @@ -278,7 +272,7 @@ class AstBuilder extends DataTypeAstBuilder assertSqlState(sqlState) // Get condition name. - val conditionName = visitMultipartIdentifier(ctx.multipartIdentifier()).head + val conditionName = getIdentifierText(ctx.strictIdentifier()) assertConditionName(conditionName) @@ -570,15 +564,15 @@ class AstBuilder extends DataTypeAstBuilder val query = withOrigin(queryCtx) { SingleStatement(visitQuery(queryCtx)) } - parsingCtx.labelContext.enterForScope(Option(ctx.multipartIdentifier())) - val varName = Option(ctx.multipartIdentifier()).map(ParserUtils.getMultipartIdentifierText) + parsingCtx.labelContext.enterForScope(Option(ctx.strictIdentifier())) + val varName = Option(ctx.strictIdentifier()).map(getIdentifierText) val body = visitCompoundBodyImpl( ctx.compoundBody(), None, parsingCtx, isScope = false ) - parsingCtx.labelContext.exitForScope(Option(ctx.multipartIdentifier())) + parsingCtx.labelContext.exitForScope(Option(ctx.strictIdentifier())) parsingCtx.labelContext.exitLabeledScope(Option(ctx.beginLabel())) ForStatement(query, varName, body, Some(labelText)) @@ -589,26 +583,26 @@ class AstBuilder extends DataTypeAstBuilder ctx match { case c: BeginEndCompoundBlockContext if Option(c.beginLabel()).exists { b => - b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label) + getIdentifierText(b.strictIdentifier()).toLowerCase(Locale.ROOT).equals(label) } => if (isIterate) { throw SqlScriptingErrors.invalidIterateLabelUsageForCompound(CurrentOrigin.get, label) } true case c: WhileStatementContext if Option(c.beginLabel()).exists { b => - b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label) + getIdentifierText(b.strictIdentifier()).toLowerCase(Locale.ROOT).equals(label) } => true case c: RepeatStatementContext if Option(c.beginLabel()).exists { b => - b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label) + getIdentifierText(b.strictIdentifier()).toLowerCase(Locale.ROOT).equals(label) } => true case c: LoopStatementContext if Option(c.beginLabel()).exists { b => - b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label) + getIdentifierText(b.strictIdentifier()).toLowerCase(Locale.ROOT).equals(label) } => true case c: ForStatementContext if Option(c.beginLabel()).exists { b => - b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label) + getIdentifierText(b.strictIdentifier()).toLowerCase(Locale.ROOT).equals(label) } => true case _ => false } @@ -616,7 +610,7 @@ class AstBuilder extends DataTypeAstBuilder override def visitLeaveStatement(ctx: LeaveStatementContext): LeaveStatement = withOrigin(ctx) { - val labelText = ctx.multipartIdentifier().getText.toLowerCase(Locale.ROOT) + val labelText = getIdentifierText(ctx.strictIdentifier()).toLowerCase(Locale.ROOT) var parentCtx = ctx.parent while (Option(parentCtx).isDefined) { @@ -632,7 +626,7 @@ class AstBuilder extends DataTypeAstBuilder override def visitIterateStatement(ctx: IterateStatementContext): IterateStatement = withOrigin(ctx) { - val labelText = ctx.multipartIdentifier().getText.toLowerCase(Locale.ROOT) + val labelText = getIdentifierText(ctx.strictIdentifier()).toLowerCase(Locale.ROOT) var parentCtx = ctx.parent while (Option(parentCtx).isDefined) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index 9df5ada12ac2..336db1382f89 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -20,7 +20,6 @@ import java.util import java.util.Locale import scala.collection.{immutable, mutable} -import scala.jdk.CollectionConverters._ import scala.util.matching.Regex import org.antlr.v4.runtime.{ParserRuleContext, Token} @@ -29,7 +28,7 @@ import org.antlr.v4.runtime.tree.{ParseTree, TerminalNodeImpl} import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier -import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{BeginLabelContext, EndLabelContext, MultipartIdentifierContext} +import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{BeginLabelContext, EndLabelContext, StrictIdentifierContext} import org.apache.spark.sql.catalyst.plans.logical.{CreateVariable, ErrorCondition} import org.apache.spark.sql.catalyst.trees.CurrentOrigin import org.apache.spark.sql.catalyst.util.SparkParserUtils @@ -48,28 +47,6 @@ object ParserUtils extends SparkParserUtils { throw QueryParsingErrors.invalidStatementError(statement, ctx) } - /** - * Gets the resolved text of a multipart identifier, handling IDENTIFIER('literal') syntax. - * This method properly traverses the parse tree structure to extract identifier literals, - * making it robust to comments, whitespace, and string coalescing. - * Uses DataTypeAstBuilder.extractIdentifierParts to reuse existing logic. - * - * @param ctx The multipart identifier context from the parse tree. - * @return The resolved identifier text as a dot-separated string. - */ - def getMultipartIdentifierText(ctx: MultipartIdentifierContext): String = { - // Create a DataTypeAstBuilder that delegates multi-part identifier parsing to - // CatalystSqlParser. - val astBuilder = new DataTypeAstBuilder { - override protected def parseMultipartIdentifier(identifier: String): Seq[String] = { - CatalystSqlParser.parseMultipartIdentifier(identifier) - } - } - ctx.parts.asScala.flatMap { part => - astBuilder.extractIdentifierParts(part) - }.mkString(".") - } - def checkDuplicateClauses[T]( nodes: util.List[T], clauseName: String, ctx: ParserRuleContext): Unit = { if (nodes.size() > 1) { @@ -302,36 +279,41 @@ class SqlScriptingLabelContext { * @param beginLabelCtx Begin label context. * @param endLabelCtx The end label context. */ - private def checkLabels( - beginLabelCtx: Option[BeginLabelContext], - endLabelCtx: Option[EndLabelContext]): Unit = { - // First, check if the begin label is qualified (if it exists). - beginLabelCtx.foreach { bl => - val resolvedLabel = ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()) - if (bl.multipartIdentifier().parts.size() > 1 || resolvedLabel.contains(".")) { - withOrigin(bl) { - throw SqlScriptingErrors.labelCannotBeQualified( - CurrentOrigin.get, - resolvedLabel.toLowerCase(Locale.ROOT) - ) - } + /** + * Get label text from label context, handling IDENTIFIER() syntax. + */ + private def getLabelText(ctx: ParserRuleContext): String = { + val astBuilder = new DataTypeAstBuilder { + override protected def parseMultipartIdentifier(identifier: String): Seq[String] = { + CatalystSqlParser.parseMultipartIdentifier(identifier) } } + val parts = astBuilder.extractIdentifierParts(ctx) + if (parts.size > 1) { + throw new ParseException( + errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS", + messageParameters = Map("identifier" -> parts.map(part => s"`$part`").mkString("."), + "limit" -> "1"), + ctx) + } + parts.head + } - // Then, check label matching and other constraints. + private def checkLabels( + beginLabelCtx: Option[BeginLabelContext], + endLabelCtx: Option[EndLabelContext]): Unit = { + // Check label matching and other constraints. (beginLabelCtx, endLabelCtx) match { // Throw an error if labels do not match. case (Some(bl: BeginLabelContext), Some(el: EndLabelContext)) => - val beginLabel = ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()) - .toLowerCase(Locale.ROOT) - val endLabel = ParserUtils.getMultipartIdentifierText(el.multipartIdentifier()) - .toLowerCase(Locale.ROOT) + val beginLabel = getLabelText(bl.strictIdentifier()).toLowerCase(Locale.ROOT) + val endLabel = getLabelText(el.strictIdentifier()).toLowerCase(Locale.ROOT) if (beginLabel != endLabel) { withOrigin(bl) { throw SqlScriptingErrors.labelsMismatch( CurrentOrigin.get, - ParserUtils.getMultipartIdentifierText(bl.multipartIdentifier()), - ParserUtils.getMultipartIdentifierText(el.multipartIdentifier())) + getLabelText(bl.strictIdentifier()), + getLabelText(el.strictIdentifier())) } } // Throw an error if end label exists without begin label. @@ -339,7 +321,7 @@ class SqlScriptingLabelContext { withOrigin(el) { throw SqlScriptingErrors.endLabelWithoutBeginLabel( CurrentOrigin.get, - ParserUtils.getMultipartIdentifierText(el.multipartIdentifier())) + getLabelText(el.strictIdentifier())) } case _ => } @@ -347,8 +329,7 @@ class SqlScriptingLabelContext { /** Check if the label is defined. */ private def isLabelDefined(beginLabelCtx: Option[BeginLabelContext]): Boolean = { - beginLabelCtx.map(ctx => - ParserUtils.getMultipartIdentifierText(ctx.multipartIdentifier())).isDefined + beginLabelCtx.isDefined } /** @@ -356,13 +337,13 @@ class SqlScriptingLabelContext { * If the identifier is contained within seenLabels, raise an exception. */ private def assertIdentifierNotInSeenLabels( - identifierCtx: Option[MultipartIdentifierContext]): Unit = { + identifierCtx: Option[StrictIdentifierContext]): Unit = { identifierCtx.foreach { ctx => - val identifierName = ParserUtils.getMultipartIdentifierText(ctx) - if (seenLabels.contains(identifierName.toLowerCase(Locale.ROOT))) { + val identifierName = getLabelText(ctx).toLowerCase(Locale.ROOT) + if (seenLabels.contains(identifierName)) { withOrigin(ctx) { throw SqlScriptingErrors - .duplicateLabels(CurrentOrigin.get, identifierName.toLowerCase(Locale.ROOT)) + .duplicateLabels(CurrentOrigin.get, identifierName) } } } @@ -382,8 +363,7 @@ class SqlScriptingLabelContext { // Get label text and add it to seenLabels. val labelText = if (isLabelDefined(beginLabelCtx)) { - val txt = ParserUtils.getMultipartIdentifierText( - beginLabelCtx.get.multipartIdentifier()).toLowerCase(Locale.ROOT) + val txt = getLabelText(beginLabelCtx.get.strictIdentifier()).toLowerCase(Locale.ROOT) if (seenLabels.contains(txt)) { withOrigin(beginLabelCtx.get) { throw SqlScriptingErrors.duplicateLabels(CurrentOrigin.get, txt) @@ -409,8 +389,7 @@ class SqlScriptingLabelContext { */ def exitLabeledScope(beginLabelCtx: Option[BeginLabelContext]): Unit = { if (isLabelDefined(beginLabelCtx)) { - seenLabels.remove(ParserUtils.getMultipartIdentifierText( - beginLabelCtx.get.multipartIdentifier()).toLowerCase(Locale.ROOT)) + seenLabels.remove(getLabelText(beginLabelCtx.get.strictIdentifier()).toLowerCase(Locale.ROOT)) } } @@ -419,9 +398,9 @@ class SqlScriptingLabelContext { * If the for loop variable is defined, it will be asserted to not be inside seenLabels. * Then, if the for loop variable is defined, it will be added to seenLabels. */ - def enterForScope(identifierCtx: Option[MultipartIdentifierContext]): Unit = { + def enterForScope(identifierCtx: Option[StrictIdentifierContext]): Unit = { identifierCtx.foreach { ctx => - val identifierName = ParserUtils.getMultipartIdentifierText(ctx) + val identifierName = getLabelText(ctx) assertIdentifierNotInSeenLabels(identifierCtx) seenLabels.add(identifierName.toLowerCase(Locale.ROOT)) @@ -439,9 +418,9 @@ class SqlScriptingLabelContext { * Exit a for loop scope. * If the for loop variable is defined, it will be removed from seenLabels. */ - def exitForScope(identifierCtx: Option[MultipartIdentifierContext]): Unit = { + def exitForScope(identifierCtx: Option[StrictIdentifierContext]): Unit = { identifierCtx.foreach { ctx => - val identifierName = ParserUtils.getMultipartIdentifierText(ctx) + val identifierName = getLabelText(ctx) seenLabels.remove(identifierName.toLowerCase(Locale.ROOT)) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala index 298329db1ee3..8c4c9044248d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala @@ -2273,11 +2273,11 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper { | END; |END""".stripMargin checkError( - exception = intercept[SqlScriptingException] { + exception = intercept[ParseException] { parsePlan(sqlScriptText) }, - condition = "INVALID_LABEL_USAGE.QUALIFIED_LABEL_NAME", - parameters = Map("labelName" -> "PART1.PART2")) + condition = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'.'", "hint" -> "")) } test("qualified label name: label cannot be qualified + end label") { @@ -2288,11 +2288,11 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper { | END part1.part2; |END""".stripMargin checkError( - exception = intercept[SqlScriptingException] { + exception = intercept[ParseException] { parsePlan(sqlScriptText) }, - condition = "INVALID_LABEL_USAGE.QUALIFIED_LABEL_NAME", - parameters = Map("labelName" -> "PART1.PART2")) + condition = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'.'", "hint" -> "")) } test("unique label names: nested labeled scope statements") { @@ -2785,13 +2785,13 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper { |BEGIN | DECLARE TEST.CONDITION CONDITION FOR SQLSTATE '12345'; |END""".stripMargin - val exception = intercept[SqlScriptingException] { + val exception = intercept[ParseException] { parsePlan(sqlScriptText) } checkError( exception = exception, - condition = "INVALID_ERROR_CONDITION_DECLARATION.QUALIFIED_CONDITION_NAME", - parameters = Map("conditionName" -> "TEST.CONDITION")) + condition = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'FOR'", "hint" -> ": missing ';'")) assert(exception.origin.line.contains(3)) } From 281eb195de3dc9fab8c2a192a64217ceca8a166a Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 12 Nov 2025 16:24:08 -0800 Subject: [PATCH 36/37] Minor fixes --- .../org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 | 2 +- .../apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala | 4 ++-- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 4099d5506e1f..9d942bc60159 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1592,7 +1592,7 @@ qualifiedName // replace identifier with errorCapturingIdentifier where the immediate follow symbol is not an expression, otherwise // valid expressions such as "a-b" can be recognized as an identifier errorCapturingIdentifier - : identifier errorCapturingIdentifierExtra #errorCapturingIdentifierBase + : identifier errorCapturingIdentifierExtra ; // extra left-factoring grammar diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala index a07a8171d114..212c80a3cb43 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala @@ -244,10 +244,10 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with DataTypeE // Parse the string to handle qualified identifiers like "`cat`.`schema`". parseMultipartIdentifier(literalValue) - case base: ErrorCapturingIdentifierBaseContext => + case errCapture: ErrorCapturingIdentifierContext => // Regular identifier with errorCapturingIdentifierExtra. // Need to recursively handle identifier which might itself be IDENTIFIER('literal'). - Option(base.identifier()) + Option(errCapture.identifier()) .flatMap(id => Option(id.strictIdentifier()).map(getIdentifierParts)) .getOrElse(Seq(ctx.getText)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c32491648417..4d2d35974969 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2553,8 +2553,7 @@ class AstBuilder extends DataTypeAstBuilder /** * Create a Sequence of Strings for an identifier list. - * Note: Each identifier in the list is kept as a single string, even if it's a qualified - * identifier-lite (e.g., IDENTIFIER('a.b') stays as "a.b", not split into parts). + * Each identifier must be unqualified. Handles both regular identifiers and IDENTIFIER('literal'). */ override def visitIdentifierSeq(ctx: IdentifierSeqContext): Seq[String] = withOrigin(ctx) { ctx.ident.asScala.map(id => getIdentifierText(id)).toSeq From 16e57835552345cd858e08c55f408b8affb003af Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Wed, 12 Nov 2025 17:18:11 -0800 Subject: [PATCH 37/37] scala style --- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 4d2d35974969..d1d4a6b8c980 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2553,7 +2553,8 @@ class AstBuilder extends DataTypeAstBuilder /** * Create a Sequence of Strings for an identifier list. - * Each identifier must be unqualified. Handles both regular identifiers and IDENTIFIER('literal'). + * Each identifier must be unqualified. + * Handles both regular identifiers and IDENTIFIER('literal'). */ override def visitIdentifierSeq(ctx: IdentifierSeqContext): Seq[String] = withOrigin(ctx) { ctx.ident.asScala.map(id => getIdentifierText(id)).toSeq