diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index d45174425f47..977efa8f2433 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -130,7 +130,7 @@ The following options can be used to configure the version of Hive that is used 2.3.10 Version of the Hive metastore. Available - options are 2.0.0 through 2.3.10, 3.0.0 through 3.1.3, and 4.0.0 through 4.0.1. + options are 2.0.0 through 2.3.10, 3.0.0 through 3.1.3, and 4.0.0 through 4.1.0. 1.4.0 diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index edc0a61d8f1d..d199dc31c58b 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -1059,7 +1059,7 @@ Python UDF registration is unchanged. Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs. Currently, Hive SerDes and UDFs are based on built-in Hive, and Spark SQL can be connected to different versions of Hive Metastore -(from 2.0.0 to 2.3.10 and 3.0.0 to 3.1.3. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)). +(from 2.0.0 to 2.3.10 and 3.0.0 to 4.1.0. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore). #### Deploying in Existing Hive Warehouses {:.no_toc} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index 11a1f1166e5e..253ee81b31b6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -76,7 +76,7 @@ private[spark] object HiveUtils extends Logging { .doc("Version of the Hive metastore. Available options are " + "2.0.0 through 2.3.10, " + "3.0.0 through 3.1.3 and " + - "4.0.0 through 4.0.1.") + "4.0.0 through 4.1.0.") .version("1.4.0") .stringConf .checkValue(isCompatibleHiveVersion, "Unsupported Hive Metastore version") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 8f7b892cf83b..9389bb425492 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -127,6 +127,7 @@ private[hive] class HiveClientImpl( case hive.v3_0 => new Shim_v3_0() case hive.v3_1 => new Shim_v3_1() case hive.v4_0 => new Shim_v4_0() + case hive.v4_1 => new Shim_v4_1() } // Create an internal session state for this HiveClientImpl. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index d3eb797c103d..24c5cfb72c7e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -1543,3 +1543,5 @@ private[client] class Shim_v4_0 extends Shim_v3_1 { renamePartitionMethod.invoke(hive, table, oldPartSpec, newPart, writeIdInLoadTableOrPartition) } } + +private[client] class Shim_v4_1 extends Shim_v4_0 diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 56c1d402e1b2..1adc2f623b76 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -98,6 +98,7 @@ private[hive] object IsolatedClientLoader extends Logging { case (3, 0, _) => Some(hive.v3_0) case (3, 1, _) => Some(hive.v3_1) case (4, 0, _) => Some(hive.v4_0) + case (4, 1, _) => Some(hive.v4_1) case _ => None }.getOrElse { throw QueryExecutionErrors.unsupportedHiveMetastoreVersionError( diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index 6a9815342e73..d7a0c58b4016 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -101,8 +101,25 @@ package object client { "org.pentaho:pentaho-aggdesigner-algorithm", "org.apache.hive:hive-vector-code-gen")) + case object v4_1 extends HiveVersion("4.1.0", + extraDeps = + "org.antlr:antlr4-runtime:4.9.3" :: + "org.apache.derby:derby:10.14.1.0" :: + "org.apache.hadoop:hadoop-hdfs:3.4.1" :: + "org.datanucleus:datanucleus-api-jdo:6.0.5" :: + "org.datanucleus:datanucleus-core:6.0.11" :: + "org.datanucleus:datanucleus-rdbms:6.0.10" :: + "org.datanucleus:javax.jdo:3.2.1" :: + "org.springframework:spring-core:5.3.39" :: + "org.springframework:spring-jdbc:5.3.39" :: Nil, + exclusions = + "org.apache.curator:*" :: + "org.apache.hive:hive-service-rpc" :: + "org.apache.tez:tez-api" :: + "org.apache.zookeeper:zookeeper" :: Nil) + val allSupportedHiveVersions: Set[HiveVersion] = - Set(v2_0, v2_1, v2_2, v2_3, v3_0, v3_1, v4_0) + Set(v2_0, v2_1, v2_2, v2_3, v3_0, v3_1, v4_0, v4_1) } // scalastyle:on diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala index 355fece722b3..d0d848bcb562 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala @@ -45,6 +45,8 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) { private val emptyDir = Utils.createTempDir().getCanonicalPath + private val ver = IsolatedClientLoader.hiveVersion(version) + /** * Drops table `tableName` after calling `f`. */ @@ -166,7 +168,7 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) { // test alter database location val tempDatabasePath2 = Utils.createTempDir().toURI // Hive support altering database location since HIVE-8472. - if (version == "3.0" || version == "3.1" || version == "4.0") { + if (ver.compare(hive.v3_0) >= 0) { client.alterDatabase(database.copy(locationUri = tempDatabasePath2)) val uriInCatalog = client.getDatabase("temporary").locationUri assert("file" === uriInCatalog.getScheme) @@ -336,7 +338,7 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) { } test("listTables(database)") { - assert(client.listTables("default") === Seq("src", "temporary", "view1")) + assert((client.listTables("default") diff Seq("src", "temporary", "view1")) === Nil) } test("listTables(database, pattern)") { @@ -579,7 +581,7 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) { test("sql create index and reset") { // HIVE-18448 Since Hive 3.0, INDEX is not supported. - if (version != "3.0" && version != "3.1" && version != "4.0") { + if (ver.compare(hive.v3_0) < 0) { client.runSqlHive("CREATE TABLE indexed_table (key INT)") client.runSqlHive("CREATE INDEX index_1 ON TABLE indexed_table(key) " + "as 'COMPACT' WITH DEFERRED REBUILD") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala index f54760e44b96..c06e2dea40f9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala @@ -22,6 +22,6 @@ private[client] trait HiveClientVersions { protected val versions = if (testVersions.nonEmpty) { testVersions.get.split(",").map(_.trim).filter(_.nonEmpty).toIndexedSeq } else { - IndexedSeq("2.0", "2.1", "2.2", "2.3", "3.0", "3.1", "4.0") + IndexedSeq("2.0", "2.1", "2.2", "2.3", "3.0", "3.1", "4.0", "4.1") } }