diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index e5becac54032..0a2533d28f0b 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -25,6 +25,7 @@ license: | ## Upgrading from Spark SQL 4.0 to 4.1 - Since Spark 4.1, the Parquet reader no longer assumes all struct values to be null, if all the requested fields are missing in the parquet file. The new default behavior is to read an additional struct field that is present in the file to determine nullness. To restore the previous behavior, set `spark.sql.legacy.parquet.returnNullStructIfAllFieldsMissing` to `true`. +- Since Spark 4.1, the Spark Thrift Server returns the corrected 1-based ORDINAL_POSITION in the result of GetColumns operation, instead of the wrongly 0-based. To restore the previous behavior, set `spark.sql.legacy.hive.thriftServer.useZeroBasedColumnOrdinalPosition` to `true`. ## Upgrading from Spark SQL 3.5 to 4.0 diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala index 6c573ceb14ec..bdfd84e9da5f 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala @@ -31,6 +31,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.internal.LogKeys._ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.types._ /** @@ -200,6 +201,11 @@ private[hive] class SparkGetColumnsOperation( schema.zipWithIndex.foreach { case (column, pos) => if (columnPattern != null && !columnPattern.matcher(column.name).matches()) { } else { + val ordinal = if (session.conf.get(HiveUtils.LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL)) { + pos + } else { + pos + 1 + } val rowData = Array[AnyRef]( null, // TABLE_CAT dbName, // TABLE_SCHEM @@ -217,7 +223,7 @@ private[hive] class SparkGetColumnsOperation( null, // SQL_DATA_TYPE null, // SQL_DATETIME_SUB null, // CHAR_OCTET_LENGTH - pos.asInstanceOf[AnyRef], // ORDINAL_POSITION + ordinal.asInstanceOf[AnyRef], // ORDINAL_POSITION, 1-based "YES", // IS_NULLABLE null, // SCOPE_CATALOG null, // SCOPE_SCHEMA diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala index abd2b1983b34..a10d2974db74 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala @@ -24,6 +24,7 @@ import org.apache.hive.service.cli.HiveSQLException import org.apache.spark.SPARK_VERSION import org.apache.spark.sql.catalyst.analysis.FunctionRegistry +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.util.VersionUtils @@ -341,7 +342,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NULLABLE") === 1) assert(rowSet.getString("REMARKS") === pos.toString) - assert(rowSet.getInt("ORDINAL_POSITION") === pos) + assert(rowSet.getInt("ORDINAL_POSITION") === pos + 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") pos += 1 @@ -372,7 +373,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 0) assert(rowSet.getString("REMARKS") === "") - assert(rowSet.getInt("ORDINAL_POSITION") === 0) + assert(rowSet.getInt("ORDINAL_POSITION") === 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") } @@ -400,7 +401,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 0) assert(rowSet.getString("REMARKS") === "") - assert(rowSet.getInt("ORDINAL_POSITION") === 0) + assert(rowSet.getInt("ORDINAL_POSITION") === 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") } @@ -426,7 +427,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 0) assert(rowSet.getString("REMARKS") === "") - assert(rowSet.getInt("ORDINAL_POSITION") === 0) + assert(rowSet.getInt("ORDINAL_POSITION") === 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") } @@ -453,7 +454,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 1) assert(rowSet.getString("REMARKS") === "") - assert(rowSet.getInt("ORDINAL_POSITION") === 0) + assert(rowSet.getInt("ORDINAL_POSITION") === 1) assert(rowSet.getString("IS_NULLABLE") === "YES") assert(rowSet.getString("IS_AUTO_INCREMENT") === "NO") } @@ -680,9 +681,36 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase { assert(rowSet.getInt("DECIMAL_DIGITS") === 6) assert(rowSet.getInt("NUM_PREC_RADIX") === 0) assert(rowSet.getInt("NULLABLE") === 0) - assert(rowSet.getInt("ORDINAL_POSITION") === idx) + assert(rowSet.getInt("ORDINAL_POSITION") === idx + 1) idx += 1 } } } + + test("SPARK-54350: SparkGetColumnsOperation respects useZeroBasedColumnOrdinalPosition config") { + Seq(true, false).foreach { zeroBasedOrdinal => + val viewName = "view_column_ordinal_position" + val ddl = s"CREATE OR REPLACE GLOBAL TEMPORARY VIEW $viewName AS " + + "SELECT 1 AS id, 'foo' AS name" + + withJdbcStatement(viewName) { statement => + statement.execute( + s"SET ${HiveUtils.LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL.key}=$zeroBasedOrdinal") + statement.execute(ddl) + val data = statement.getConnection.getMetaData + val rowSet = data.getColumns("", "global_temp", viewName, null) + assert(rowSet.next()) + assert(rowSet.getString("TABLE_SCHEM") === "global_temp") + assert(rowSet.getString("TABLE_NAME") === viewName) + assert(rowSet.getString("COLUMN_NAME") === "id") + assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 0 else 1)) + assert(rowSet.next()) + assert(rowSet.getString("TABLE_SCHEM") === "global_temp") + assert(rowSet.getString("TABLE_NAME") === viewName) + assert(rowSet.getString("COLUMN_NAME") === "name") + assert(rowSet.getInt("ORDINAL_POSITION") === (if (zeroBasedOrdinal) 1 else 2)) + assert(!rowSet.next()) + } + } + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index ac346a5b3ecf..f0e1e208e542 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -222,6 +222,14 @@ private[spark] object HiveUtils extends Logging { .booleanConf .createWithDefault(true) + val LEGACY_STS_ZERO_BASED_COLUMN_ORDINAL = + buildConf("spark.sql.legacy.hive.thriftServer.useZeroBasedColumnOrdinalPosition") + .doc("When set to true, Hive Thrift server returns 0-based ORDINAL_POSITION in the " + + "result of GetColumns operation, instead of the corrected 1-based.") + .version("4.1.0") + .booleanConf + .createWithDefault(false) + val USE_DELEGATE_FOR_SYMLINK_TEXT_INPUT_FORMAT = buildConf("spark.sql.hive.useDelegateForSymlinkTextInputFormat") .internal()