From 3470b81dde7ffa7a76a6790c0fe5b3933ec4460a Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Fri, 14 Nov 2025 19:41:45 +0800 Subject: [PATCH 1/3] [SPARK-54350][STS] SparkGetColumnsOperation ORDINAL_POSITION should be 1-based --- .../hive/thriftserver/SparkGetColumnsOperation.scala | 2 +- .../thriftserver/SparkMetadataOperationSuite.scala | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala index 6c573ceb14ec..50ffa124cab3 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala @@ -217,7 +217,7 @@ private[hive] class SparkGetColumnsOperation( null, // SQL_DATA_TYPE null, // SQL_DATETIME_SUB null, // CHAR_OCTET_LENGTH - pos.asInstanceOf[AnyRef], // ORDINAL_POSITION + (pos + 1).asInstanceOf[AnyRef], // ORDINAL_POSITION, 1-based "YES", // IS_NULLABLE null, // SCOPE_CATALOG null, // SCOPE_SCHEMA diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index abd2b1983b34..92ed52819767 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -341,7 +341,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NULLABLE") === 1) assert(rowSet.getString("REMARKS") === pos.toString) - assert(rowSet.getInt("ORDINAL_POSITION") === pos) + assert(rowSet.getInt("ORDINAL_POSITION") === pos + 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") pos += 1 @@ -372,7 +372,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 0) assert(rowSet.getString("REMARKS") === "") - assert(rowSet.getInt("ORDINAL_POSITION") === 0) + assert(rowSet.getInt("ORDINAL_POSITION") === 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") } @@ -400,7 +400,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 0) assert(rowSet.getString("REMARKS") === "") - assert(rowSet.getInt("ORDINAL_POSITION") === 0) + assert(rowSet.getInt("ORDINAL_POSITION") === 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") } @@ -426,7 +426,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 0) assert(rowSet.getString("REMARKS") === "") - assert(rowSet.getInt("ORDINAL_POSITION") === 0) + assert(rowSet.getInt("ORDINAL_POSITION") === 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") } @@ -453,7 +453,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 1) assert(rowSet.getString("REMARKS") === "") - assert(rowSet.getInt("ORDINAL_POSITION") === 0) + assert(rowSet.getInt("ORDINAL_POSITION") === 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") } @@ -680,7 +680,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("DECIMAL_DIGITS") === 6) assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 0) - assert(rowSet.getInt("ORDINAL_POSITION") === idx) + assert(rowSet.getInt("ORDINAL_POSITION") === idx + 1) idx += 1 } } From c719d72a32f8b56abefe65ebcd581befbd59c2a9 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Tue, 18 Nov 2025 15:28:31 +0800 Subject: [PATCH 2/3] useZeroBasedColumnOrdinalPosition config --- docs/sql-migration-guide.md | 1 + .../SparkGetColumnsOperation.scala | 8 +++++- .../SparkMetadataOperationSuite.scala | 25 +++++++++++++++++++ .../org/apache/spark/sql/hive/HiveUtils.scala | 8 ++++++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index e5becac54032..0a2533d28f0b 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -25,6 +25,7 @@ license: | ## Upgrading from Spark SQL 4.0 to 4.1 - Since Spark 4.1, the Parquet reader no longer assumes all struct values to be null, if all the requested fields are missing in the parquet file. The new default behavior is to read an additional struct field that is present in the file to determine nullness. To restore the previous behavior, set `spark.sql.legacy.parquet.returnNullStructIfAllFieldsMissing` to `true`. +- Since Spark 4.1, the Spark Thrift Server returns the corrected 1-based ORDINAL_POSITION in the result of GetColumns operation, instead of the wrongly 0-based. To restore the previous behavior, set `spark.sql.legacy.hive.thriftServer.useZeroBasedColumnOrdinalPosition` to `true`. ## Upgrading from Spark SQL 3.5 to 4.0 diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala index 50ffa124cab3..bdfd84e9da5f 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala @@ -31,6 +31,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.internal.LogKeys._ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.types._ /** @@ -200,6 +201,11 @@ private[hive] class SparkGetColumnsOperation( schema.zipWithIndex.foreach { case (column, pos) => if (columnPattern != null && !columnPattern.matcher(column.name).matches()) { } else { + val ordinal = if (session.conf.get(HiveUtils.LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL)) { + pos + } else { + pos + 1 + } val rowData = Array[AnyRef]( null, // TABLE_CAT dbName, // TABLE_SCHEM @@ -217,7 +223,7 @@ private[hive] class SparkGetColumnsOperation( null, // SQL_DATA_TYPE null, // SQL_DATETIME_SUB null, // CHAR_OCTET_LENGTH - (pos + 1).asInstanceOf[AnyRef], // ORDINAL_POSITION, 1-based + ordinal.asInstanceOf[AnyRef], // ORDINAL_POSITION, 1-based "YES", // IS_NULLABLE null, // SCOPE_CATALOG null, // SCOPE_SCHEMA diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index 92ed52819767..1c50c0dfafb3 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -24,6 +24,7 @@ import org.apache.hive.service.cli.HiveSQLException import org.apache.spark.SPARK_VERSION import org.apache.spark.sql.catalyst.analysis.FunctionRegistry +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.util.VersionUtils @@ -685,4 +686,28 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { } } } + + test("SPARK-54350: SparkGetColumnsOperation respects useZeroBasedColumnOrdinalPosition config") { + Seq(true, false).foreach { zeroBasedOrdinal => + val tableName = "column_ordinal_position" + val ddl = s"CREATE TABLE $tableName (id INT, name STRING)" + + withJdbcStatement(tableName) { statement => + statement.execute( + s"SET ${HiveUtils.LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL.key}=$zeroBasedOrdinal") + statement.execute(ddl) + val data = statement.getConnection.getMetaData + val rowSet = data.getColumns("", "", tableName, null) + assert(rowSet.next()) + assert(rowSet.getString("TABLE_NAME") === tableName) + assert(rowSet.getString("COLUMN_NAME") === "id") + assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 0 else 1)) + assert(rowSet.next()) + assert(rowSet.getString("TABLE_NAME") === tableName) + assert(rowSet.getString("COLUMN_NAME") === "name") + assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 1 else 2)) + assert(!rowSet.next()) + } + } + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index ac346a5b3ecf..f0e1e208e542 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -222,6 +222,14 @@ private[spark] object HiveUtils extends Logging { .booleanConf .createWithDefault(true) + val LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL = + buildConf("spark.sql.legacy.hive.thriftServer.useZeroBasedColumnOrdinalPosition") + .doc("When set to true, Hive Thrift server returns 0-based ORDINAL_POSITION in the " + + "result of GetColumns operation, instead of the corrected 1-based.") + .version("4.1.0") + .booleanConf + .createWithDefault(false) + val USE_DELEGATE_FOR_SYMLINK_TEXT_INPUT_FORMAT = buildConf("spark.sql.hive.useDelegateForSymlinkTextInputFormat") .internal() From 558392abe73e196135924f3c21bb63ef369a8942 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Tue, 18 Nov 2025 16:06:13 +0800 Subject: [PATCH 3/3] use view for testing --- .../SparkMetadataOperationSuite.scala | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index 1c50c0dfafb3..a10d2974db74 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -689,21 +689,24 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { test("SPARK-54350: SparkGetColumnsOperation respects useZeroBasedColumnOrdinalPosition config") { Seq(true, false).foreach { zeroBasedOrdinal => - val tableName = "column_ordinal_position" - val ddl = s"CREATE TABLE $tableName (id INT, name STRING)" + val viewName = "view_column_ordinal_position" + val ddl = s"CREATE OR REPLACE GLOBAL TEMPORARY VIEW $viewName AS " + + "SELECT 1 AS id, 'foo' AS name" - withJdbcStatement(tableName) { statement => + withJdbcStatement(viewName) { statement => statement.execute( s"SET ${HiveUtils.LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL.key}=$zeroBasedOrdinal") statement.execute(ddl) val data = statement.getConnection.getMetaData - val rowSet = data.getColumns("", "", tableName, null) + val rowSet = data.getColumns("", "global_temp", viewName, null) assert(rowSet.next()) - assert(rowSet.getString("TABLE_NAME") === tableName) + assert(rowSet.getString("TABLE_SCHEM") === "global_temp") + assert(rowSet.getString("TABLE_NAME") === viewName) assert(rowSet.getString("COLUMN_NAME") === "id") assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 0 else 1)) assert(rowSet.next()) - assert(rowSet.getString("TABLE_NAME") === tableName) + assert(rowSet.getString("TABLE_SCHEM") === "global_temp") + assert(rowSet.getString("TABLE_NAME") === viewName) assert(rowSet.getString("COLUMN_NAME") === "name") assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 1 else 2)) assert(!rowSet.next())