From 79b147ad3a3562ea91531a9c5583c03a97a455a7 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 11 Oct 2023 10:18:50 +0800 Subject: [PATCH 01/33] [KYUUBI #5362] Remove Spark 3.0 support for Authz ### _Why are the changes needed?_ To close #5362 . Considering the maintenance burden of the Kyuubi community and easy cross-support for data lake projects. Drop support EOLs of Spark 3 for the coming Spark 4.x era in kyuubi v1.9. We will still do bugfix release for these spark3.0.x users. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5363 from AngersZhuuuu/KYUUBI-5362. Closes #5362 d34cd6e2b [Angerszhuuuu] Update build.md 99f414bd4 [Angerszhuuuu] Update build.md a5129e4f6 [Angerszhuuuu] Update build.md 6ee008cc5 [Angerszhuuuu] Update README.md af792cc42 [Angerszhuuuu] Update master.yml 69b333161 [Angerszhuuuu] Merge branch 'master' into KYUUBI-5362 528554e9c [Angerszhuuuu] Update IcebergCatalogPrivilegesBuilderSuite.scala 427ebd48d [Angerszhuuuu] Update DataMaskingForJDBCV2Suite.scala 64809a54f [Angerszhuuuu] update f7d89fd9b [Angerszhuuuu] [KYUUBI-5362] Kyuubi remove Authz test for spark3.0.3 Authored-by: Angerszhuuuu Signed-off-by: Cheng Pan --- .github/workflows/master.yml | 43 ------------ docs/security/authorization/spark/build.md | 2 +- extensions/spark/kyuubi-spark-authz/README.md | 2 +- ...IcebergCatalogPrivilegesBuilderSuite.scala | 22 +++---- .../spark/authz/PrivilegesBuilderSuite.scala | 1 - ...bcTableCatalogPrivilegesBuilderSuite.scala | 18 ++--- ...bergCatalogRangerSparkExtensionSuite.scala | 53 +++++++-------- .../ranger/RangerSparkExtensionSuite.scala | 20 +----- ...ableCatalogRangerSparkExtensionSuite.scala | 65 ++++++------------- .../DataMaskingForIcebergSuite.scala | 34 ++++------ .../DataMaskingForJDBCV2Suite.scala | 37 ++++------- .../datamasking/DataMaskingTestBase.scala | 2 - .../RowFilteringForIcebergSuite.scala | 34 ++++------ .../RowFilteringForJDBCV2Suite.scala | 37 ++++------- .../rowfiltering/RowFilteringTestBase.scala | 2 - 15 files changed, 113 insertions(+), 259 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 7c442dd0f48..c3cc537366e 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -127,49 +127,6 @@ jobs: **/kyuubi-spark-sql-engine.log* **/kyuubi-spark-batch-submit.log* - authz: - name: Kyuubi-AuthZ and Spark Test - runs-on: ubuntu-22.04 - strategy: - fail-fast: false - matrix: - java: - - 8 - - 11 - spark: - - '3.0.3' - comment: ["normal"] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - name: Tune Runner VM - uses: ./.github/actions/tune-runner-vm - - name: Setup JDK ${{ matrix.java }} - uses: actions/setup-java@v3 - with: - distribution: temurin - java-version: ${{ matrix.java }} - cache: 'maven' - check-latest: false - - name: Setup Maven - uses: ./.github/actions/setup-maven - - name: Cache Engine Archives - uses: ./.github/actions/cache-engine-archives - - name: Build and test Kyuubi AuthZ with supported Spark versions - run: | - TEST_MODULES="extensions/spark/kyuubi-spark-authz" - ./build/mvn clean test ${MVN_OPT} -pl ${TEST_MODULES} -am \ - -Dspark.version=${{ matrix.spark }} - - name: Upload test logs - if: failure() - uses: actions/upload-artifact@v3 - with: - name: unit-tests-log-java-${{ matrix.java }}-spark-${{ matrix.spark }}-${{ matrix.comment }} - path: | - **/target/unit-tests.log - **/kyuubi-spark-sql-engine.log* - scala-test: name: Scala Test runs-on: ubuntu-22.04 diff --git a/docs/security/authorization/spark/build.md b/docs/security/authorization/spark/build.md index 7e38f2eed19..aa7fc18da1c 100644 --- a/docs/security/authorization/spark/build.md +++ b/docs/security/authorization/spark/build.md @@ -51,7 +51,7 @@ The available `spark.version`s are shown in the following table. | 3.3.x | √ | - | | 3.2.x | √ | - | | 3.1.x | √ | - | -| 3.0.x | √ | - | +| 3.0.x | x | EOL since v1.9.0 | | 2.4.x and earlier | × | [PR 2367](https://github.com/apache/kyuubi/pull/2367) is used to track how we work with older releases with scala 2.11 | Currently, Spark released with Scala 2.12 are supported. diff --git a/extensions/spark/kyuubi-spark-authz/README.md b/extensions/spark/kyuubi-spark-authz/README.md index 374f83b0379..9657b5b7a5c 100644 --- a/extensions/spark/kyuubi-spark-authz/README.md +++ b/extensions/spark/kyuubi-spark-authz/README.md @@ -38,7 +38,7 @@ build/mvn clean package -DskipTests -pl :kyuubi-spark-authz_2.12 -am -Dspark.ver - [x] 3.3.x - [x] 3.2.x - [x] 3.1.x -- [x] 3.0.x +- [ ] 3.0.x - [ ] 2.4.x and earlier ### Supported Apache Ranger Versions diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala index 45186e2502d..39966af916a 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala @@ -22,7 +22,6 @@ import org.scalatest.Outcome import org.apache.kyuubi.Utils import org.apache.kyuubi.plugin.spark.authz.OperationType._ import org.apache.kyuubi.plugin.spark.authz.ranger.AccessType -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ import org.apache.kyuubi.tags.IcebergTest import org.apache.kyuubi.util.AssertionUtils._ @@ -30,9 +29,7 @@ import org.apache.kyuubi.util.AssertionUtils._ class IcebergCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite { override protected val catalogImpl: String = "hive" override protected val sqlExtensions: String = - if (isSparkV31OrGreater) { - "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions" - } else "" + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions" override protected def format = "iceberg" override protected val supportsUpdateTable = false @@ -42,20 +39,17 @@ class IcebergCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite { override protected val supportsPartitionManagement = false override def beforeAll(): Unit = { - if (isSparkV31OrGreater) { - spark.conf.set( - s"spark.sql.catalog.$catalogV2", - "org.apache.iceberg.spark.SparkCatalog") - spark.conf.set(s"spark.sql.catalog.$catalogV2.type", "hadoop") - spark.conf.set( - s"spark.sql.catalog.$catalogV2.warehouse", - Utils.createTempDir("iceberg-hadoop").toString) - } + spark.conf.set( + s"spark.sql.catalog.$catalogV2", + "org.apache.iceberg.spark.SparkCatalog") + spark.conf.set(s"spark.sql.catalog.$catalogV2.type", "hadoop") + spark.conf.set( + s"spark.sql.catalog.$catalogV2.warehouse", + Utils.createTempDir("iceberg-hadoop").toString) super.beforeAll() } override def withFixture(test: NoArgTest): Outcome = { - assume(isSparkV31OrGreater) test() } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala index 723fabd7b67..54b91eb2837 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala @@ -662,7 +662,6 @@ abstract class PrivilegesBuilderSuite extends AnyFunSuite } test("RefreshFunctionCommand") { - assume(isSparkV31OrGreater) sql(s"CREATE FUNCTION RefreshFunctionCommand AS '${getClass.getCanonicalName}'") val plan = sql("REFRESH FUNCTION RefreshFunctionCommand") .queryExecution.analyzed diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2JdbcTableCatalogPrivilegesBuilderSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2JdbcTableCatalogPrivilegesBuilderSuite.scala index 1037d9811ee..4fe13201d87 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2JdbcTableCatalogPrivilegesBuilderSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2JdbcTableCatalogPrivilegesBuilderSuite.scala @@ -23,7 +23,6 @@ import scala.util.Try import org.scalatest.Outcome import org.apache.kyuubi.plugin.spark.authz.serde._ -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ import org.apache.kyuubi.util.AssertionUtils._ class V2JdbcTableCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite { @@ -39,15 +38,13 @@ class V2JdbcTableCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite val jdbcUrl: String = s"$dbUrl;create=true" override def beforeAll(): Unit = { - if (isSparkV31OrGreater) { - spark.conf.set( - s"spark.sql.catalog.$catalogV2", - "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") - spark.conf.set(s"spark.sql.catalog.$catalogV2.url", jdbcUrl) - spark.conf.set( - s"spark.sql.catalog.$catalogV2.driver", - "org.apache.derby.jdbc.AutoloadedDriver") - } + spark.conf.set( + s"spark.sql.catalog.$catalogV2", + "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + spark.conf.set(s"spark.sql.catalog.$catalogV2.url", jdbcUrl) + spark.conf.set( + s"spark.sql.catalog.$catalogV2.driver", + "org.apache.derby.jdbc.AutoloadedDriver") super.beforeAll() } @@ -61,7 +58,6 @@ class V2JdbcTableCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite } override def withFixture(test: NoArgTest): Outcome = { - assume(isSparkV31OrGreater) test() } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala index 55fde3b685b..e33fbb7a3df 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala @@ -37,9 +37,7 @@ import org.apache.kyuubi.util.AssertionUtils._ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { override protected val catalogImpl: String = "hive" override protected val sqlExtensions: String = - if (isSparkV31OrGreater) - "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions" - else "" + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions" val catalogV2 = "local" val namespace1 = icebergNamespace @@ -47,37 +45,34 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite val outputTable1 = "outputTable1" override def withFixture(test: NoArgTest): Outcome = { - assume(isSparkV31OrGreater) test() } override def beforeAll(): Unit = { - if (isSparkV31OrGreater) { - spark.conf.set( - s"spark.sql.catalog.$catalogV2", - "org.apache.iceberg.spark.SparkCatalog") - spark.conf.set(s"spark.sql.catalog.$catalogV2.type", "hadoop") - spark.conf.set( - s"spark.sql.catalog.$catalogV2.warehouse", - Utils.createTempDir("iceberg-hadoop").toString) - - super.beforeAll() - - doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $catalogV2.$namespace1")) - doAs( - admin, - sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$table1" + - " (id int, name string, city string) USING iceberg")) + spark.conf.set( + s"spark.sql.catalog.$catalogV2", + "org.apache.iceberg.spark.SparkCatalog") + spark.conf.set(s"spark.sql.catalog.$catalogV2.type", "hadoop") + spark.conf.set( + s"spark.sql.catalog.$catalogV2.warehouse", + Utils.createTempDir("iceberg-hadoop").toString) - doAs( - admin, - sql(s"INSERT INTO $catalogV2.$namespace1.$table1" + - " (id , name , city ) VALUES (1, 'liangbowen','Guangzhou')")) - doAs( - admin, - sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$outputTable1" + - " (id int, name string, city string) USING iceberg")) - } + super.beforeAll() + + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $catalogV2.$namespace1")) + doAs( + admin, + sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$table1" + + " (id int, name string, city string) USING iceberg")) + + doAs( + admin, + sql(s"INSERT INTO $catalogV2.$namespace1.$table1" + + " (id , name , city ) VALUES (1, 'liangbowen','Guangzhou')")) + doAs( + admin, + sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$outputTable1" + + " (id int, name string, city string) USING iceberg")) } override def afterAll(): Unit = { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala index 0c307195cee..a4148d9a542 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala @@ -567,11 +567,7 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { someone, { sql(s"select * from $db1.$permView").collect() })) - if (isSparkV31OrGreater) { - assert(e1.getMessage.contains(s"does not have [select] privilege on [$db1/$permView/id]")) - } else { - assert(e1.getMessage.contains(s"does not have [select] privilege on [$db1/$table/id]")) - } + assert(e1.getMessage.contains(s"does not have [select] privilege on [$db1/$permView/id]")) } } @@ -590,22 +586,12 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { // query all columns of the permanent view // with access privileges to the permanent view but no privilege to the source table val sql1 = s"SELECT * FROM $db1.$permView" - if (isSparkV31OrGreater) { - doAs(userPermViewOnly, { sql(sql1).collect() }) - } else { - val e1 = intercept[AccessControlException](doAs(userPermViewOnly, { sql(sql1).collect() })) - assert(e1.getMessage.contains(s"does not have [select] privilege on [$db1/$table/id]")) - } + doAs(userPermViewOnly, { sql(sql1).collect() }) // query the second column of permanent view with multiple columns // with access privileges to the permanent view but no privilege to the source table val sql2 = s"SELECT name FROM $db1.$permView" - if (isSparkV31OrGreater) { - doAs(userPermViewOnly, { sql(sql2).collect() }) - } else { - val e2 = intercept[AccessControlException](doAs(userPermViewOnly, { sql(sql2).collect() })) - assert(e2.getMessage.contains(s"does not have [select] privilege on [$db1/$table/name]")) - } + doAs(userPermViewOnly, { sql(sql2).collect() }) } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala index 5c27a470f74..253880bbf2e 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala @@ -44,27 +44,25 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu val jdbcUrl: String = s"$dbUrl;create=true" override def beforeAll(): Unit = { - if (isSparkV31OrGreater) { - spark.conf.set( - s"spark.sql.catalog.$catalogV2", - "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") - spark.conf.set(s"spark.sql.catalog.$catalogV2.url", jdbcUrl) - spark.conf.set( - s"spark.sql.catalog.$catalogV2.driver", - "org.apache.derby.jdbc.AutoloadedDriver") - - super.beforeAll() - - doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $catalogV2.$namespace1")) - doAs( - admin, - sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$table1" + - " (id int, name string, city string)")) - doAs( - admin, - sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$outputTable1" + - " (id int, name string, city string)")) - } + spark.conf.set( + s"spark.sql.catalog.$catalogV2", + "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + spark.conf.set(s"spark.sql.catalog.$catalogV2.url", jdbcUrl) + spark.conf.set( + s"spark.sql.catalog.$catalogV2.driver", + "org.apache.derby.jdbc.AutoloadedDriver") + + super.beforeAll() + + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $catalogV2.$namespace1")) + doAs( + admin, + sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$table1" + + " (id int, name string, city string)")) + doAs( + admin, + sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$outputTable1" + + " (id int, name string, city string)")) } override def afterAll(): Unit = { @@ -79,8 +77,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] CREATE DATABASE") { - assume(isSparkV31OrGreater) - // create database val e1 = intercept[AccessControlException]( doAs(someone, sql(s"CREATE DATABASE IF NOT EXISTS $catalogV2.$namespace2").explain())) @@ -89,8 +85,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] DROP DATABASE") { - assume(isSparkV31OrGreater) - // create database val e1 = intercept[AccessControlException]( doAs(someone, sql(s"DROP DATABASE IF EXISTS $catalogV2.$namespace2").explain())) @@ -99,8 +93,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] SELECT TABLE") { - assume(isSparkV31OrGreater) - // select val e1 = intercept[AccessControlException]( doAs(someone, sql(s"select city, id from $catalogV2.$namespace1.$table1").explain())) @@ -109,7 +101,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #4255] DESCRIBE TABLE") { - assume(isSparkV31OrGreater) val e1 = intercept[AccessControlException]( doAs(someone, sql(s"DESCRIBE TABLE $catalogV2.$namespace1.$table1").explain())) assert(e1.getMessage.contains(s"does not have [select] privilege" + @@ -117,8 +108,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] CREATE TABLE") { - assume(isSparkV31OrGreater) - // CreateTable val e2 = intercept[AccessControlException]( doAs(someone, sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$table2"))) @@ -136,8 +125,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] DROP TABLE") { - assume(isSparkV31OrGreater) - // DropTable val e3 = intercept[AccessControlException]( doAs(someone, sql(s"DROP TABLE $catalogV2.$namespace1.$table1"))) @@ -146,8 +133,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] INSERT TABLE") { - assume(isSparkV31OrGreater) - // AppendData: Insert Using a VALUES Clause val e4 = intercept[AccessControlException]( doAs( @@ -186,8 +171,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] MERGE INTO") { - assume(isSparkV31OrGreater) - val mergeIntoSql = s""" |MERGE INTO $catalogV2.$namespace1.$outputTable1 AS target @@ -218,8 +201,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] UPDATE TABLE") { - assume(isSparkV31OrGreater) - // UpdateTable val e5 = intercept[AccessControlException]( doAs( @@ -231,8 +212,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] DELETE FROM TABLE") { - assume(isSparkV31OrGreater) - // DeleteFromTable val e6 = intercept[AccessControlException]( doAs(someone, sql(s"DELETE FROM $catalogV2.$namespace1.$table1 WHERE id=1"))) @@ -241,8 +220,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] CACHE TABLE") { - assume(isSparkV31OrGreater) - // CacheTable val e7 = intercept[AccessControlException]( doAs( @@ -281,8 +258,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] ALTER TABLE") { - assume(isSparkV31OrGreater) - // AddColumns val e61 = intercept[AccessControlException]( doAs( @@ -318,8 +293,6 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu } test("[KYUUBI #3424] COMMENT ON") { - assume(isSparkV31OrGreater) - // CommentOnNamespace val e1 = intercept[AccessControlException]( doAs( diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForIcebergSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForIcebergSuite.scala index 905cd428cab..405e53fc2ac 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForIcebergSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForIcebergSuite.scala @@ -21,25 +21,18 @@ import org.apache.spark.SparkConf import org.scalatest.Outcome import org.apache.kyuubi.Utils -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ class DataMaskingForIcebergSuite extends DataMaskingTestBase { override protected val extraSparkConf: SparkConf = { - val conf = new SparkConf() - - if (isSparkV31OrGreater) { - conf - .set("spark.sql.defaultCatalog", "testcat") - .set( - "spark.sql.catalog.testcat", - "org.apache.iceberg.spark.SparkCatalog") - .set(s"spark.sql.catalog.testcat.type", "hadoop") - .set( - "spark.sql.catalog.testcat.warehouse", - Utils.createTempDir("iceberg-hadoop").toString) - } - conf - + new SparkConf() + .set("spark.sql.defaultCatalog", "testcat") + .set( + "spark.sql.catalog.testcat", + "org.apache.iceberg.spark.SparkCatalog") + .set(s"spark.sql.catalog.testcat.type", "hadoop") + .set( + "spark.sql.catalog.testcat.warehouse", + Utils.createTempDir("iceberg-hadoop").toString) } override protected val catalogImpl: String = "in-memory" @@ -47,19 +40,14 @@ class DataMaskingForIcebergSuite extends DataMaskingTestBase { override protected def format: String = "USING iceberg" override def beforeAll(): Unit = { - if (isSparkV31OrGreater) { - super.beforeAll() - } + super.beforeAll() } override def afterAll(): Unit = { - if (isSparkV31OrGreater) { - super.afterAll() - } + super.afterAll() } override def withFixture(test: NoArgTest): Outcome = { - assume(isSparkV31OrGreater) test() } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala index f74092d0b45..249d903525c 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala @@ -23,23 +23,17 @@ import scala.util.Try import org.apache.spark.SparkConf import org.scalatest.Outcome -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ - class DataMaskingForJDBCV2Suite extends DataMaskingTestBase { override protected val extraSparkConf: SparkConf = { - val conf = new SparkConf() - if (isSparkV31OrGreater) { - conf - .set("spark.sql.defaultCatalog", "testcat") - .set( - "spark.sql.catalog.testcat", - "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") - .set(s"spark.sql.catalog.testcat.url", "jdbc:derby:memory:testcat;create=true") - .set( - s"spark.sql.catalog.testcat.driver", - "org.apache.derby.jdbc.AutoloadedDriver") - } - conf + new SparkConf() + .set("spark.sql.defaultCatalog", "testcat") + .set( + "spark.sql.catalog.testcat", + "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + .set(s"spark.sql.catalog.testcat.url", "jdbc:derby:memory:testcat;create=true") + .set( + s"spark.sql.catalog.testcat.driver", + "org.apache.derby.jdbc.AutoloadedDriver") } override protected val catalogImpl: String = "in-memory" @@ -47,21 +41,18 @@ class DataMaskingForJDBCV2Suite extends DataMaskingTestBase { override protected def format: String = "" override def beforeAll(): Unit = { - if (isSparkV31OrGreater) super.beforeAll() + super.beforeAll() } override def afterAll(): Unit = { - if (isSparkV31OrGreater) { - super.afterAll() - // cleanup db - Try { - DriverManager.getConnection(s"jdbc:derby:memory:testcat;shutdown=true") - } + super.afterAll() + // cleanup db + Try { + DriverManager.getConnection(s"jdbc:derby:memory:testcat;shutdown=true") } } override def withFixture(test: NoArgTest): Outcome = { - assume(isSparkV31OrGreater) test() } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingTestBase.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingTestBase.scala index af87a39a0af..d8877b7f9c8 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingTestBase.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingTestBase.scala @@ -30,7 +30,6 @@ import org.scalatest.funsuite.AnyFunSuite import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ import org.apache.kyuubi.plugin.spark.authz.SparkSessionProvider import org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ /** * Base trait for data masking tests, derivative classes shall name themselves following: @@ -279,7 +278,6 @@ trait DataMaskingTestBase extends AnyFunSuite with SparkSessionProvider with Bef } test("KYUUBI #3581: permanent view should lookup rule on itself not the raw table") { - assume(isSparkV31OrGreater) val supported = doAs( permViewUser, Try(sql("CREATE OR REPLACE VIEW default.perm_view AS SELECT * FROM default.src")).isSuccess) diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForIcebergSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForIcebergSuite.scala index a93a69662e5..57a9e29b665 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForIcebergSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForIcebergSuite.scala @@ -21,25 +21,18 @@ import org.apache.spark.SparkConf import org.scalatest.Outcome import org.apache.kyuubi.Utils -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ class RowFilteringForIcebergSuite extends RowFilteringTestBase { override protected val extraSparkConf: SparkConf = { - val conf = new SparkConf() - - if (isSparkV31OrGreater) { - conf - .set("spark.sql.defaultCatalog", "testcat") - .set( - "spark.sql.catalog.testcat", - "org.apache.iceberg.spark.SparkCatalog") - .set(s"spark.sql.catalog.testcat.type", "hadoop") - .set( - "spark.sql.catalog.testcat.warehouse", - Utils.createTempDir("iceberg-hadoop").toString) - } - conf - + new SparkConf() + .set("spark.sql.defaultCatalog", "testcat") + .set( + "spark.sql.catalog.testcat", + "org.apache.iceberg.spark.SparkCatalog") + .set(s"spark.sql.catalog.testcat.type", "hadoop") + .set( + "spark.sql.catalog.testcat.warehouse", + Utils.createTempDir("iceberg-hadoop").toString) } override protected val catalogImpl: String = "in-memory" @@ -47,19 +40,14 @@ class RowFilteringForIcebergSuite extends RowFilteringTestBase { override protected def format: String = "USING iceberg" override def beforeAll(): Unit = { - if (isSparkV31OrGreater) { - super.beforeAll() - } + super.beforeAll() } override def afterAll(): Unit = { - if (isSparkV31OrGreater) { - super.afterAll() - } + super.afterAll() } override def withFixture(test: NoArgTest): Outcome = { - assume(isSparkV31OrGreater) test() } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala index 09ae6a008b5..7d20d051581 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala @@ -24,23 +24,17 @@ import scala.util.Try import org.apache.spark.SparkConf import org.scalatest.Outcome -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ - class RowFilteringForJDBCV2Suite extends RowFilteringTestBase { override protected val extraSparkConf: SparkConf = { - val conf = new SparkConf() - if (isSparkV31OrGreater) { - conf - .set("spark.sql.defaultCatalog", "testcat") - .set( - "spark.sql.catalog.testcat", - "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") - .set(s"spark.sql.catalog.testcat.url", "jdbc:derby:memory:testcat;create=true") - .set( - s"spark.sql.catalog.testcat.driver", - "org.apache.derby.jdbc.AutoloadedDriver") - } - conf + new SparkConf() + .set("spark.sql.defaultCatalog", "testcat") + .set( + "spark.sql.catalog.testcat", + "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + .set(s"spark.sql.catalog.testcat.url", "jdbc:derby:memory:testcat;create=true") + .set( + s"spark.sql.catalog.testcat.driver", + "org.apache.derby.jdbc.AutoloadedDriver") } override protected val catalogImpl: String = "in-memory" @@ -48,21 +42,18 @@ class RowFilteringForJDBCV2Suite extends RowFilteringTestBase { override protected def format: String = "" override def beforeAll(): Unit = { - if (isSparkV31OrGreater) super.beforeAll() + super.beforeAll() } override def afterAll(): Unit = { - if (isSparkV31OrGreater) { - super.afterAll() - // cleanup db - Try { - DriverManager.getConnection(s"jdbc:derby:memory:testcat;shutdown=true") - } + super.afterAll() + // cleanup db + Try { + DriverManager.getConnection(s"jdbc:derby:memory:testcat;shutdown=true") } } override def withFixture(test: NoArgTest): Outcome = { - assume(isSparkV31OrGreater) test() } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringTestBase.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringTestBase.scala index 8d9561a897e..3d0890d1967 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringTestBase.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringTestBase.scala @@ -27,7 +27,6 @@ import org.scalatest.funsuite.AnyFunSuite import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ import org.apache.kyuubi.plugin.spark.authz.SparkSessionProvider import org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ /** * Base trait for row filtering tests, derivative classes shall name themselves following: @@ -98,7 +97,6 @@ trait RowFilteringTestBase extends AnyFunSuite with SparkSessionProvider with Be } test("[KYUUBI #3581]: row level filter on permanent view") { - assume(isSparkV31OrGreater) val supported = doAs( permViewUser, Try(sql("CREATE OR REPLACE VIEW default.perm_view AS SELECT * FROM default.src")).isSuccess) From e51095edaa783a8d28cec71c51f7367623d6062e Mon Sep 17 00:00:00 2001 From: ITzhangqiang Date: Wed, 11 Oct 2023 21:41:22 +0800 Subject: [PATCH 02/33] [KYUUBI #5365] Don't use Log4j2's extended throwable conversion pattern in default logging configurations ### _Why are the changes needed?_ The Apache Spark Community found a performance regression with log4j2. See https://github.com/apache/spark/pull/36747. This PR to fix the performance issue on our side. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No. Closes #5400 from ITzhangqiang/KYUUBI_5365. Closes #5365 dbb9d8b32 [ITzhangqiang] [KYUUBI #5365] Don't use Log4j2's extended throwable conversion pattern in default logging configurations Authored-by: ITzhangqiang Signed-off-by: Cheng Pan --- conf/log4j2.xml.template | 6 +++--- docker/playground/conf/kyuubi-log4j2.xml | 2 +- docs/monitor/logging.md | 2 +- .../src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/scala/resources/log4j2-test.xml | 4 ++-- .../kyuubi-spark-authz/src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-spark-lineage/src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-chat-engine/src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-jdbc-engine/src/test/resources/log4j2-test.xml | 4 ++-- .../src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-trino-engine/src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-flink-it/src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-hive-it/src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-jdbc-it/src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-kubernetes-it/src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-trino-it/src/test/resources/log4j2-test.xml | 4 ++-- .../kyuubi-zookeeper-it/src/test/resources/log4j2-test.xml | 4 ++-- kyuubi-common/src/main/resources/log4j2-defaults.xml | 2 +- .../apache/kyuubi/operation/log/Log4j2DivertAppender.scala | 2 +- kyuubi-common/src/test/resources/log4j2-test.xml | 4 ++-- kyuubi-ctl/src/test/resources/log4j2-test.xml | 4 ++-- kyuubi-events/src/test/resources/log4j2-test.xml | 4 ++-- kyuubi-ha/src/test/resources/log4j2-test.xml | 4 ++-- kyuubi-hive-jdbc/src/test/resources/log4j2-test.xml | 4 ++-- kyuubi-metrics/src/test/resources/log4j2-test.xml | 4 ++-- kyuubi-rest-client/src/test/resources/log4j2-test.xml | 4 ++-- kyuubi-server/src/test/resources/log4j2-test.xml | 6 +++--- kyuubi-zookeeper/src/test/resources/log4j2-test.xml | 4 ++-- 37 files changed, 72 insertions(+), 72 deletions(-) diff --git a/conf/log4j2.xml.template b/conf/log4j2.xml.template index 86f9459a11e..4139b7dbef2 100644 --- a/conf/log4j2.xml.template +++ b/conf/log4j2.xml.template @@ -30,14 +30,14 @@ - + - + @@ -45,7 +45,7 @@ - + diff --git a/docker/playground/conf/kyuubi-log4j2.xml b/docker/playground/conf/kyuubi-log4j2.xml index 6aedf7652ff..313c121bcff 100644 --- a/docker/playground/conf/kyuubi-log4j2.xml +++ b/docker/playground/conf/kyuubi-log4j2.xml @@ -22,7 +22,7 @@ - + diff --git a/docs/monitor/logging.md b/docs/monitor/logging.md index 24a5a88d699..9dce6e22a7e 100644 --- a/docs/monitor/logging.md +++ b/docs/monitor/logging.md @@ -114,7 +114,7 @@ For example, we can disable the console appender and enable the file appender li - + diff --git a/extensions/spark/kyuubi-extension-spark-3-3/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-extension-spark-3-3/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-extension-spark-3-3/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-extension-spark-3-3/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-extension-spark-3-4/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-extension-spark-3-4/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-extension-spark-3-4/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-extension-spark-3-4/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-extension-spark-3-5/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-extension-spark-3-5/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-extension-spark-common/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-extension-spark-common/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-extension-spark-common/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-extension-spark-common/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-extension-spark-jdbc-dialect/src/test/scala/resources/log4j2-test.xml b/extensions/spark/kyuubi-extension-spark-jdbc-dialect/src/test/scala/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-extension-spark-jdbc-dialect/src/test/scala/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-extension-spark-jdbc-dialect/src/test/scala/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-spark-authz/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-spark-authz/src/test/resources/log4j2-test.xml index 5e01ed4ab5d..7aaf820ad11 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-spark-authz/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-spark-connector-common/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-spark-connector-common/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-spark-connector-common/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-spark-connector-common/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-spark-connector-hive/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-spark-connector-hive/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-spark-connector-tpch/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/extensions/spark/kyuubi-spark-lineage/src/test/resources/log4j2-test.xml b/extensions/spark/kyuubi-spark-lineage/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/extensions/spark/kyuubi-spark-lineage/src/test/resources/log4j2-test.xml +++ b/extensions/spark/kyuubi-spark-lineage/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/externals/kyuubi-chat-engine/src/test/resources/log4j2-test.xml b/externals/kyuubi-chat-engine/src/test/resources/log4j2-test.xml index 585a12c6f99..356d645904d 100644 --- a/externals/kyuubi-chat-engine/src/test/resources/log4j2-test.xml +++ b/externals/kyuubi-chat-engine/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/externals/kyuubi-flink-sql-engine/src/test/resources/log4j2-test.xml b/externals/kyuubi-flink-sql-engine/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/externals/kyuubi-flink-sql-engine/src/test/resources/log4j2-test.xml +++ b/externals/kyuubi-flink-sql-engine/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/externals/kyuubi-hive-sql-engine/src/test/resources/log4j2-test.xml b/externals/kyuubi-hive-sql-engine/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/externals/kyuubi-hive-sql-engine/src/test/resources/log4j2-test.xml +++ b/externals/kyuubi-hive-sql-engine/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/externals/kyuubi-jdbc-engine/src/test/resources/log4j2-test.xml b/externals/kyuubi-jdbc-engine/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/externals/kyuubi-jdbc-engine/src/test/resources/log4j2-test.xml +++ b/externals/kyuubi-jdbc-engine/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/externals/kyuubi-spark-sql-engine/src/test/resources/log4j2-test.xml b/externals/kyuubi-spark-sql-engine/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/resources/log4j2-test.xml +++ b/externals/kyuubi-spark-sql-engine/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/externals/kyuubi-trino-engine/src/test/resources/log4j2-test.xml b/externals/kyuubi-trino-engine/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/externals/kyuubi-trino-engine/src/test/resources/log4j2-test.xml +++ b/externals/kyuubi-trino-engine/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/integration-tests/kyuubi-flink-it/src/test/resources/log4j2-test.xml b/integration-tests/kyuubi-flink-it/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/integration-tests/kyuubi-flink-it/src/test/resources/log4j2-test.xml +++ b/integration-tests/kyuubi-flink-it/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/integration-tests/kyuubi-hive-it/src/test/resources/log4j2-test.xml b/integration-tests/kyuubi-hive-it/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/integration-tests/kyuubi-hive-it/src/test/resources/log4j2-test.xml +++ b/integration-tests/kyuubi-hive-it/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/integration-tests/kyuubi-jdbc-it/src/test/resources/log4j2-test.xml b/integration-tests/kyuubi-jdbc-it/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/integration-tests/kyuubi-jdbc-it/src/test/resources/log4j2-test.xml +++ b/integration-tests/kyuubi-jdbc-it/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/resources/log4j2-test.xml b/integration-tests/kyuubi-kubernetes-it/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/resources/log4j2-test.xml +++ b/integration-tests/kyuubi-kubernetes-it/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/integration-tests/kyuubi-trino-it/src/test/resources/log4j2-test.xml b/integration-tests/kyuubi-trino-it/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/integration-tests/kyuubi-trino-it/src/test/resources/log4j2-test.xml +++ b/integration-tests/kyuubi-trino-it/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/integration-tests/kyuubi-zookeeper-it/src/test/resources/log4j2-test.xml b/integration-tests/kyuubi-zookeeper-it/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/integration-tests/kyuubi-zookeeper-it/src/test/resources/log4j2-test.xml +++ b/integration-tests/kyuubi-zookeeper-it/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/kyuubi-common/src/main/resources/log4j2-defaults.xml b/kyuubi-common/src/main/resources/log4j2-defaults.xml index 63841959a5c..630584611a1 100644 --- a/kyuubi-common/src/main/resources/log4j2-defaults.xml +++ b/kyuubi-common/src/main/resources/log4j2-defaults.xml @@ -21,7 +21,7 @@ - + diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j2DivertAppender.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j2DivertAppender.scala index 0daaeae48a8..d8e37a0193b 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j2DivertAppender.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/Log4j2DivertAppender.scala @@ -93,7 +93,7 @@ object Log4j2DivertAppender { ap.getLayout.isInstanceOf[StringLayout]) .map(_.getLayout.asInstanceOf[StringLayout]) .getOrElse(PatternLayout.newBuilder().withPattern( - "%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n").build()) + "%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n%ex").build()) } def initialize(): Unit = { diff --git a/kyuubi-common/src/test/resources/log4j2-test.xml b/kyuubi-common/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/kyuubi-common/src/test/resources/log4j2-test.xml +++ b/kyuubi-common/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/kyuubi-ctl/src/test/resources/log4j2-test.xml b/kyuubi-ctl/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/kyuubi-ctl/src/test/resources/log4j2-test.xml +++ b/kyuubi-ctl/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/kyuubi-events/src/test/resources/log4j2-test.xml b/kyuubi-events/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/kyuubi-events/src/test/resources/log4j2-test.xml +++ b/kyuubi-events/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/kyuubi-ha/src/test/resources/log4j2-test.xml b/kyuubi-ha/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/kyuubi-ha/src/test/resources/log4j2-test.xml +++ b/kyuubi-ha/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/kyuubi-hive-jdbc/src/test/resources/log4j2-test.xml b/kyuubi-hive-jdbc/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/kyuubi-hive-jdbc/src/test/resources/log4j2-test.xml +++ b/kyuubi-hive-jdbc/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/kyuubi-metrics/src/test/resources/log4j2-test.xml b/kyuubi-metrics/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/kyuubi-metrics/src/test/resources/log4j2-test.xml +++ b/kyuubi-metrics/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + diff --git a/kyuubi-rest-client/src/test/resources/log4j2-test.xml b/kyuubi-rest-client/src/test/resources/log4j2-test.xml index 13ea5322a93..2f13b5777d0 100644 --- a/kyuubi-rest-client/src/test/resources/log4j2-test.xml +++ b/kyuubi-rest-client/src/test/resources/log4j2-test.xml @@ -21,13 +21,13 @@ - + - + diff --git a/kyuubi-server/src/test/resources/log4j2-test.xml b/kyuubi-server/src/test/resources/log4j2-test.xml index 25e37e8594d..bccbf1b0d84 100644 --- a/kyuubi-server/src/test/resources/log4j2-test.xml +++ b/kyuubi-server/src/test/resources/log4j2-test.xml @@ -24,20 +24,20 @@ - + - + - + diff --git a/kyuubi-zookeeper/src/test/resources/log4j2-test.xml b/kyuubi-zookeeper/src/test/resources/log4j2-test.xml index bfc40dd6df4..3110216c17c 100644 --- a/kyuubi-zookeeper/src/test/resources/log4j2-test.xml +++ b/kyuubi-zookeeper/src/test/resources/log4j2-test.xml @@ -21,14 +21,14 @@ - + - + From 98b74d2ad0cf5ff4b84424bb45c8092ced381aec Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Thu, 12 Oct 2023 08:44:02 +0800 Subject: [PATCH 03/33] [KYUUBI #5399] [AUTHZ] Cleanup Spark 3.0 specific implementation ### _Why are the changes needed?_ The cleanup follow-up for #5362, which removed the Spark 3.0 tests for Authz plugin. Remove the `isSpark31OrGreater` and Spark 3.0 specific implementations in Authz plugin. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No. Closes #5399 from bowenliang123/authz-31greater. Closes #5399 db4369b13 [Bowen Liang] import cc38b1c1e [Bowen Liang] fix MASK_SHOW_FIRST_4 cf3ef4e1c [Bowen Liang] remove isSparkV31OrGreater Authored-by: Bowen Liang Signed-off-by: Bowen Liang --- .../plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala | 6 +----- .../apache/kyuubi/plugin/spark/authz/util/AuthZUtils.scala | 3 +-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala index 9abb9cd2805..d3059ef2dd3 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala @@ -26,7 +26,6 @@ import org.apache.ranger.plugin.service.RangerBasePlugin import org.slf4j.LoggerFactory import org.apache.kyuubi.plugin.spark.authz.AccessControlException -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ import org.apache.kyuubi.plugin.spark.authz.util.RangerConfigProvider object SparkRangerAdminPlugin extends RangerBasePlugin("spark", "sparkSql") @@ -109,11 +108,8 @@ object SparkRangerAdminPlugin extends RangerBasePlugin("spark", "sparkSql") } else if (result.getMaskTypeDef != null) { result.getMaskTypeDef.getName match { case "MASK" => regexp_replace(col) - case "MASK_SHOW_FIRST_4" if isSparkV31OrGreater => - regexp_replace(col, hasLen = true) case "MASK_SHOW_FIRST_4" => - val right = regexp_replace(s"substr($col, 5)") - s"concat(substr($col, 0, 4), $right)" + regexp_replace(col, hasLen = true) case "MASK_SHOW_LAST_4" => val left = regexp_replace(s"left($col, length($col) - 4)") s"concat($left, right($col, 4))" diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/AuthZUtils.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/AuthZUtils.scala index 4f7cbb9ef14..e95ff91ed57 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/AuthZUtils.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/AuthZUtils.scala @@ -61,7 +61,7 @@ private[authz] object AuthZUtils { def hasResolvedPermanentView(plan: LogicalPlan): Boolean = { plan match { - case view: View if view.resolved && isSparkV31OrGreater => + case view: View if view.resolved => !getField[Boolean](view, "isTempView") case _ => false @@ -84,7 +84,6 @@ private[authz] object AuthZUtils { } lazy val SPARK_RUNTIME_VERSION: SemanticVersion = SemanticVersion(SPARK_VERSION) - lazy val isSparkV31OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.1" lazy val isSparkV32OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.2" lazy val isSparkV33OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.3" From fd69c6ee1dd40b7c4f496c74a45f574220448bc6 Mon Sep 17 00:00:00 2001 From: "chenliang.lu" Date: Thu, 12 Oct 2023 09:58:16 +0800 Subject: [PATCH 04/33] [KYUUBI #5407][AUTHZ] Tests for Iceberg system procedures of snapshot management ### _Why are the changes needed?_ To close #5407 . Follow up for https://github.com/apache/kyuubi/pull/5248 . Add some UT for snapshot management procedures. These procedures require alter permissions. 1. rollback_to_snapshot (https://iceberg.apache.org/docs/latest/spark-procedures/#rollback_to_snapshot): Usage: `CALL catalog_name.system.rollback_to_snapshot('db.sample', 1) ` Meaning: rollback a table to a specific snapshot ID. 2. rollback_to_timestamp (https://iceberg.apache.org/docs/latest/spark-procedures/#rollback_to_timestamp) Usage: `CALL catalog_name.system.rollback_to_timestamp('db.sample', TIMESTAMP '2021-06-30 00:00:00')` Meaning: rollback the table to the latest snapshot less than time. 3. set_current_snapshot (https://iceberg.apache.org/docs/latest/spark-procedures/#set_current_snapshot) Usage: `CALL catalog_name.system.set_current_snapshot('db.sample', 1)` Meaning: Set a table to a specific snapshot ID. ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5394 from yabola/add_call. Closes #5407 98b7492e0 [Bowen Liang] split into 3 ut for snapshot management 23fe49ae4 [Bowen Liang] refactor ut 8ba97c6ef [chenliang.lu] Add UT for checking previleges in iceberg call snapshot management Lead-authored-by: chenliang.lu Co-authored-by: Bowen Liang Signed-off-by: Bowen Liang --- ...bergCatalogRangerSparkExtensionSuite.scala | 66 ++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala index e33fbb7a3df..49288055331 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala @@ -16,11 +16,15 @@ */ package org.apache.kyuubi.plugin.spark.authz.ranger -// scalastyle:off +import java.sql.Timestamp +import java.text.SimpleDateFormat + import scala.util.Try +import org.apache.spark.sql.Row import org.scalatest.Outcome +// scalastyle:off import org.apache.kyuubi.Utils import org.apache.kyuubi.plugin.spark.authz.AccessControlException import org.apache.kyuubi.plugin.spark.authz.RangerTestNamespace._ @@ -281,4 +285,64 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite }) } } + + private def prepareExampleIcebergTable(table: String, initSnapshots: Int): Unit = { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $table (id int, name string) USING iceberg")) + (0 until initSnapshots).foreach(i => + doAs(admin, sql(s"INSERT INTO $table VALUES ($i, 'user_$i')"))) + } + + private def getFirstSnapshot(table: String): Row = { + val existedSnapshots = + sql(s"SELECT * FROM $table.snapshots ORDER BY committed_at ASC LIMIT 1").collect() + existedSnapshots(0) + } + + test("CALL rollback_to_snapshot") { + val tableName = "table_rollback_to_snapshot" + val table = s"$catalogV2.$namespace1.$tableName" + withCleanTmpResources(Seq((table, "table"))) { + prepareExampleIcebergTable(table, 2) + val targetSnapshotId = getFirstSnapshot(table).getAs[Long]("snapshot_id") + val callRollbackToSnapshot = + s"CALL $catalogV2.system.rollback_to_snapshot (table => '$table', snapshot_id => $targetSnapshotId)" + + interceptContains[AccessControlException](doAs(someone, sql(callRollbackToSnapshot)))( + s"does not have [alter] privilege on [$namespace1/$tableName]") + doAs(admin, sql(callRollbackToSnapshot)) + } + } + + test("CALL rollback_to_timestamp") { + val tableName = "table_rollback_to_timestamp" + val table = s"$catalogV2.$namespace1.$tableName" + withCleanTmpResources(Seq((table, "table"))) { + prepareExampleIcebergTable(table, 2) + val callRollbackToTimestamp = { + val targetSnapshotCommittedAt = getFirstSnapshot(table).getAs[Timestamp]("committed_at") + val targetTimestamp = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") + .format(targetSnapshotCommittedAt.getTime + 1) + s"CALL $catalogV2.system.rollback_to_timestamp (table => '$table', timestamp => TIMESTAMP '$targetTimestamp')" + } + + interceptContains[AccessControlException](doAs(someone, sql(callRollbackToTimestamp)))( + s"does not have [alter] privilege on [$namespace1/$tableName]") + doAs(admin, sql(callRollbackToTimestamp)) + } + } + + test("CALL set_current_snapshot") { + val tableName = "table_set_current_snapshot" + val table = s"$catalogV2.$namespace1.$tableName" + withCleanTmpResources(Seq((table, "table"))) { + prepareExampleIcebergTable(table, 2) + val targetSnapshotId = getFirstSnapshot(table).getAs[Long]("snapshot_id") + val callSetCurrentSnapshot = + s"CALL $catalogV2.system.set_current_snapshot (table => '$table', snapshot_id => $targetSnapshotId)" + + interceptContains[AccessControlException](doAs(someone, sql(callSetCurrentSnapshot)))( + s"does not have [alter] privilege on [$namespace1/$tableName]") + doAs(admin, sql(callSetCurrentSnapshot)) + } + } } From 74e52f6924b8bd6a106925673c215383c0d50cf7 Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Thu, 12 Oct 2023 15:11:18 +0800 Subject: [PATCH 05/33] [KYUUBI #5409] [DOCS] Update config docs regeneratoin hints in developer guide ### _Why are the changes needed?_ - Update the regeneration hits for `setting.md` in `develop.md`, as the config doc`settings.md` is checked and able to regenearted by `dev/gen/gen_all_config_docs.sh` script. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No. Closes #5409 from bowenliang123/regen-settings. Closes #5409 985f39e51 [Bowen Liang] update docs Authored-by: Bowen Liang Signed-off-by: Bowen Liang --- docs/contributing/code/developer.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/contributing/code/developer.md b/docs/contributing/code/developer.md index ef6fb79889e..518d7187174 100644 --- a/docs/contributing/code/developer.md +++ b/docs/contributing/code/developer.md @@ -46,8 +46,7 @@ You can run `dev/reformat` to format all Java and Scala code. Kyuubi uses settings.md to explain available configurations. -You can run `KYUUBI_UPDATE=1 build/mvn clean test -pl kyuubi-server -am -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.config.AllKyuubiConfiguration` -to append descriptions of new configurations to settings.md. +You can run `dev/gen/gen_all_config_docs.sh` to append and update descriptions of new configurations to `settings.md`. ## Generative Tooling Usage From 1b229b63a0f101d495057cdbee6f81ee8b426006 Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Fri, 13 Oct 2023 14:46:53 +0800 Subject: [PATCH 06/33] [KYUUBI #5323] [AUTHZ] Drop Hive and Iceberg tables with PURGE option in tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### _Why are the changes needed?_ - `DROP TABLE` for Iceberg tables only removes the table from catalog by default, which may contaminates other tests with same table - Enable PURGE option for dropping Iceberg and Hive table - Iceberg Spark DDL `DROP TABLE ... PURGE` - To drop the table from the catalog and delete the table’s contents ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ Closes #5323 from bowenliang123/iceberg-purge. Closes #5323 ce4188dd2 [Bowen Liang] purge Authored-by: Bowen Liang Signed-off-by: Bowen Liang --- .../spark/authz/SparkSessionProvider.scala | 18 +++++++++++++++++- ...dbcTableCatalogPrivilegesBuilderSuite.scala | 10 +++++++--- ...TableCatalogRangerSparkExtensionSuite.scala | 5 ++--- .../DataMaskingForJDBCV2Suite.scala | 6 +++--- .../RowFilteringForJDBCV2Suite.scala | 6 +++--- .../operation/IcebergMetadataTests.scala | 2 +- 6 files changed, 33 insertions(+), 14 deletions(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala index e6f70b4d1a6..c7e541ef525 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala @@ -27,6 +27,7 @@ import org.scalatest.Assertions._ import org.apache.kyuubi.Utils import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ +import org.apache.kyuubi.plugin.spark.authz.V2JdbcTableCatalogPrivilegesBuilderSuite._ import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ trait SparkSessionProvider { @@ -79,7 +80,15 @@ trait SparkSessionProvider { f } finally { res.foreach { - case (t, "table") => doAs(admin, sql(s"DROP TABLE IF EXISTS $t")) + case (t, "table") => doAs( + admin, { + val purgeOption = + if (isSparkV32OrGreater && isCatalogSupportPurge( + spark.sessionState.catalogManager.currentCatalog.name())) { + "PURGE" + } else "" + sql(s"DROP TABLE IF EXISTS $t $purgeOption") + }) case (db, "database") => doAs(admin, sql(s"DROP DATABASE IF EXISTS $db")) case (fn, "function") => doAs(admin, sql(s"DROP FUNCTION IF EXISTS $fn")) case (view, "view") => doAs(admin, sql(s"DROP VIEW IF EXISTS $view")) @@ -96,4 +105,11 @@ trait SparkSessionProvider { doAs(user, assert(sql(query).collect() === result)) } + private def isCatalogSupportPurge(catalogName: String): Boolean = { + val unsupportedCatalogs = Set(v2JdbcTableCatalogClassName) + spark.conf.getOption(s"spark.sql.catalog.$catalogName") match { + case Some(catalog) if !unsupportedCatalogs.contains(catalog) => true + case _ => false + } + } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2JdbcTableCatalogPrivilegesBuilderSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2JdbcTableCatalogPrivilegesBuilderSuite.scala index 4fe13201d87..d1a6f4ae8b0 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2JdbcTableCatalogPrivilegesBuilderSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2JdbcTableCatalogPrivilegesBuilderSuite.scala @@ -22,6 +22,7 @@ import scala.util.Try import org.scalatest.Outcome +import org.apache.kyuubi.plugin.spark.authz.V2JdbcTableCatalogPrivilegesBuilderSuite._ import org.apache.kyuubi.plugin.spark.authz.serde._ import org.apache.kyuubi.util.AssertionUtils._ @@ -38,9 +39,7 @@ class V2JdbcTableCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite val jdbcUrl: String = s"$dbUrl;create=true" override def beforeAll(): Unit = { - spark.conf.set( - s"spark.sql.catalog.$catalogV2", - "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + spark.conf.set(s"spark.sql.catalog.$catalogV2", v2JdbcTableCatalogClassName) spark.conf.set(s"spark.sql.catalog.$catalogV2.url", jdbcUrl) spark.conf.set( s"spark.sql.catalog.$catalogV2.driver", @@ -170,3 +169,8 @@ class V2JdbcTableCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite } } } + +object V2JdbcTableCatalogPrivilegesBuilderSuite { + val v2JdbcTableCatalogClassName: String = + "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog" +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala index 253880bbf2e..046052d558d 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/V2JdbcTableCatalogRangerSparkExtensionSuite.scala @@ -24,6 +24,7 @@ import scala.util.Try import org.apache.kyuubi.plugin.spark.authz.AccessControlException import org.apache.kyuubi.plugin.spark.authz.RangerTestNamespace._ import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ +import org.apache.kyuubi.plugin.spark.authz.V2JdbcTableCatalogPrivilegesBuilderSuite._ import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ /** @@ -44,9 +45,7 @@ class V2JdbcTableCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSu val jdbcUrl: String = s"$dbUrl;create=true" override def beforeAll(): Unit = { - spark.conf.set( - s"spark.sql.catalog.$catalogV2", - "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + spark.conf.set(s"spark.sql.catalog.$catalogV2", v2JdbcTableCatalogClassName) spark.conf.set(s"spark.sql.catalog.$catalogV2.url", jdbcUrl) spark.conf.set( s"spark.sql.catalog.$catalogV2.driver", diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala index 249d903525c..411d98cf937 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingForJDBCV2Suite.scala @@ -23,13 +23,13 @@ import scala.util.Try import org.apache.spark.SparkConf import org.scalatest.Outcome +import org.apache.kyuubi.plugin.spark.authz.V2JdbcTableCatalogPrivilegesBuilderSuite._ + class DataMaskingForJDBCV2Suite extends DataMaskingTestBase { override protected val extraSparkConf: SparkConf = { new SparkConf() .set("spark.sql.defaultCatalog", "testcat") - .set( - "spark.sql.catalog.testcat", - "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + .set("spark.sql.catalog.testcat", v2JdbcTableCatalogClassName) .set(s"spark.sql.catalog.testcat.url", "jdbc:derby:memory:testcat;create=true") .set( s"spark.sql.catalog.testcat.driver", diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala index 7d20d051581..bfe1cd9e499 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfiltering/RowFilteringForJDBCV2Suite.scala @@ -24,13 +24,13 @@ import scala.util.Try import org.apache.spark.SparkConf import org.scalatest.Outcome +import org.apache.kyuubi.plugin.spark.authz.V2JdbcTableCatalogPrivilegesBuilderSuite._ + class RowFilteringForJDBCV2Suite extends RowFilteringTestBase { override protected val extraSparkConf: SparkConf = { new SparkConf() .set("spark.sql.defaultCatalog", "testcat") - .set( - "spark.sql.catalog.testcat", - "org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog") + .set("spark.sql.catalog.testcat", v2JdbcTableCatalogClassName) .set(s"spark.sql.catalog.testcat.url", "jdbc:derby:memory:testcat;create=true") .set( s"spark.sql.catalog.testcat.driver", diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/IcebergMetadataTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/IcebergMetadataTests.scala index 99482f0c5ff..814c08343d0 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/IcebergMetadataTests.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/IcebergMetadataTests.scala @@ -133,7 +133,7 @@ trait IcebergMetadataTests extends HiveJDBCTestHelper with IcebergSuiteMixin wit } assert(!rs1.next()) } finally { - statement.execute(s"DROP TABLE IF EXISTS $cg.$db.tbl") + statement.execute(s"DROP TABLE IF EXISTS $cg.$db.tbl PURGE") } } } From 4bb67bdf3bcabd135102bd85c8992e9b52ba9356 Mon Sep 17 00:00:00 2001 From: hezhao2 Date: Mon, 16 Oct 2023 11:46:56 +0800 Subject: [PATCH 07/33] [KYUUBI #5381] Change the default metrics reporter to Prometheus ### _Why are the changes needed?_ Close #5381 change default metrics reporter to prometheus since Kyuubi 1.8 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ Closes #5344 from zhaohehuhu/Improvement-0928. Closes #5381 84f4c8208 [hezhao2] reset METRICS_REPORTERS for test case b9ee5f711 [Cheng Pan] Update kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala 86165a6fe [Cheng Pan] Update kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala a3605b626 [hezhao2] set METRICS_PROMETHEUS_PORT to 0 for test cases f1a4d2861 [hezhao2] restore version number for kyuubi.metrics.reporters in doc dae40e1a2 [hezhao2] change default metrics reporter to prometheus Lead-authored-by: hezhao2 Co-authored-by: Cheng Pan Signed-off-by: Cheng Pan --- docs/configuration/settings.md | 20 +++++++++---------- docs/deployment/migration-guide.md | 3 +++ .../apache/kyuubi/metrics/MetricsConf.scala | 2 +- .../ServerJsonLoggingEventHandlerSuite.scala | 2 ++ 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index 832099764c2..2869a59cd92 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -352,16 +352,16 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Metrics -| Key | Default | Meaning | Type | Since | -|---------------------------------|----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.metrics.console.interval | PT5S | How often should report metrics to console | duration | 1.2.0 | -| kyuubi.metrics.enabled | true | Set to true to enable kyuubi metrics system | boolean | 1.2.0 | -| kyuubi.metrics.json.interval | PT5S | How often should report metrics to JSON file | duration | 1.2.0 | -| kyuubi.metrics.json.location | metrics | Where the JSON metrics file located | string | 1.2.0 | -| kyuubi.metrics.prometheus.path | /metrics | URI context path of prometheus metrics HTTP server | string | 1.2.0 | -| kyuubi.metrics.prometheus.port | 10019 | Prometheus metrics HTTP server port | int | 1.2.0 | -| kyuubi.metrics.reporters | JSON | A comma-separated list for all metrics reporters
  • CONSOLE - ConsoleReporter which outputs measurements to CONSOLE periodically.
  • JMX - JmxReporter which listens for new metrics and exposes them as MBeans.
  • JSON - JsonReporter which outputs measurements to json file periodically.
  • PROMETHEUS - PrometheusReporter which exposes metrics in Prometheus format.
  • SLF4J - Slf4jReporter which outputs measurements to system log periodically.
| set | 1.2.0 | -| kyuubi.metrics.slf4j.interval | PT5S | How often should report metrics to SLF4J logger | duration | 1.2.0 | +| Key | Default | Meaning | Type | Since | +|---------------------------------|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.metrics.console.interval | PT5S | How often should report metrics to console | duration | 1.2.0 | +| kyuubi.metrics.enabled | true | Set to true to enable kyuubi metrics system | boolean | 1.2.0 | +| kyuubi.metrics.json.interval | PT5S | How often should report metrics to JSON file | duration | 1.2.0 | +| kyuubi.metrics.json.location | metrics | Where the JSON metrics file located | string | 1.2.0 | +| kyuubi.metrics.prometheus.path | /metrics | URI context path of prometheus metrics HTTP server | string | 1.2.0 | +| kyuubi.metrics.prometheus.port | 10019 | Prometheus metrics HTTP server port | int | 1.2.0 | +| kyuubi.metrics.reporters | PROMETHEUS | A comma-separated list for all metrics reporters
  • CONSOLE - ConsoleReporter which outputs measurements to CONSOLE periodically.
  • JMX - JmxReporter which listens for new metrics and exposes them as MBeans.
  • JSON - JsonReporter which outputs measurements to json file periodically.
  • PROMETHEUS - PrometheusReporter which exposes metrics in Prometheus format.
  • SLF4J - Slf4jReporter which outputs measurements to system log periodically.
| set | 1.2.0 | +| kyuubi.metrics.slf4j.interval | PT5S | How often should report metrics to SLF4J logger | duration | 1.2.0 | ### Operation diff --git a/docs/deployment/migration-guide.md b/docs/deployment/migration-guide.md index 27dad2aba92..58df0fcc629 100644 --- a/docs/deployment/migration-guide.md +++ b/docs/deployment/migration-guide.md @@ -24,6 +24,9 @@ To restore previous behavior, set `kyuubi.metadata.store.jdbc.database.type=DERBY` and `kyuubi.metadata.store.jdbc.url=jdbc:derby:memory:kyuubi_state_store_db;create=true`. +* Since Kyuubi 1.8, PROMETHEUS is changed as the default metrics reporter. To restore previous behavior, + set `kyuubi.metrics.reporters=JSON`. + ## Upgrading from Kyuubi 1.7.1 to 1.7.2 * Since Kyuubi 1.7.2, for Kyuubi BeeLine, please use `--python-mode` option to run python code or script. diff --git a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConf.scala b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConf.scala index fe11f6eb18b..9bc2e63243a 100644 --- a/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConf.scala +++ b/kyuubi-metrics/src/main/scala/org/apache/kyuubi/metrics/MetricsConf.scala @@ -46,7 +46,7 @@ object MetricsConf { .transformToUpperCase .toSet() .checkValues(ReporterType) - .createWithDefault(Set(JSON.toString)) + .createWithDefault(Set(PROMETHEUS.toString)) val METRICS_CONSOLE_INTERVAL: ConfigEntry[Long] = buildConf("kyuubi.metrics.console.interval") .doc("How often should report metrics to console") diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala index 2f794ed4819..1dc24aeec94 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala @@ -34,6 +34,7 @@ import org.apache.kyuubi._ import org.apache.kyuubi.client.util.BatchUtils._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.events.ServerEventHandlerRegister +import org.apache.kyuubi.metrics.MetricsConf import org.apache.kyuubi.operation.HiveJDBCTestHelper import org.apache.kyuubi.operation.OperationState._ import org.apache.kyuubi.server.KyuubiServer @@ -56,6 +57,7 @@ class ServerJsonLoggingEventHandlerSuite extends WithKyuubiServer with HiveJDBCT .set(KyuubiConf.SERVER_EVENT_JSON_LOG_PATH, serverLogRoot) .set(KyuubiConf.ENGINE_SPARK_EVENT_LOGGERS, Seq("JSON")) .set(KyuubiConf.ENGINE_EVENT_JSON_LOG_PATH, engineLogRoot) + .set(MetricsConf.METRICS_REPORTERS, Set.empty[String]) } override protected def jdbcUrl: String = getJdbcUrl From c6113c3dc593b7537f7767f0359cee680c63814c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Oct 2023 07:56:24 +0000 Subject: [PATCH 08/33] Bump postcss from 8.4.16 to 8.4.31 in /kyuubi-server/web-ui (#5351) --- kyuubi-server/web-ui/package-lock.json | 26 +++++++++++++------------- kyuubi-server/web-ui/pnpm-lock.yaml | 23 ++++------------------- 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/kyuubi-server/web-ui/package-lock.json b/kyuubi-server/web-ui/package-lock.json index 352560cd703..fa01c240573 100644 --- a/kyuubi-server/web-ui/package-lock.json +++ b/kyuubi-server/web-ui/package-lock.json @@ -14,7 +14,7 @@ "element-plus": "^2.2.12", "pinia": "^2.0.18", "pinia-plugin-persistedstate": "^2.1.1", - "swagger-ui-dist": "^5.6.2", + "swagger-ui-dist": "^4.9.1", "vue": "^3.2.37", "vue-i18n": "^9.2.2", "vue-router": "^4.1.3" @@ -3444,9 +3444,9 @@ } }, "node_modules/postcss": { - "version": "8.4.24", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.24.tgz", - "integrity": "sha512-M0RzbcI0sO/XJNucsGjvWU9ERWxb/ytp1w6dKtxTKgixdtQDq4rmx/g8W1hnaheq9jgwL/oyEdH5Bc4WwJKMqg==", + "version": "8.4.31", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz", + "integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==", "funding": [ { "type": "opencollective", @@ -3874,9 +3874,9 @@ } }, "node_modules/swagger-ui-dist": { - "version": "5.6.2", - "resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-5.6.2.tgz", - "integrity": "sha512-2LKVuU2m6RHkemJloKiKJOTpN2RPmbsiad0OfSdtmFHOXJKAgYRZMwJcpT96RX6E9HUB5RkVOFC6vWqVjRgSOg==" + "version": "4.19.1", + "resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-4.19.1.tgz", + "integrity": "sha512-n/gFn+R7G/BXWwl5UZLw6F1YgWOlf3zkwGlsPhTMhNtAAolBGKg0JS5b2RKt5NI6/hSopVaSrki2wTIMUDDy2w==" }, "node_modules/symbol-tree": { "version": "3.2.4", @@ -6940,9 +6940,9 @@ } }, "postcss": { - "version": "8.4.24", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.24.tgz", - "integrity": "sha512-M0RzbcI0sO/XJNucsGjvWU9ERWxb/ytp1w6dKtxTKgixdtQDq4rmx/g8W1hnaheq9jgwL/oyEdH5Bc4WwJKMqg==", + "version": "8.4.31", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz", + "integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==", "requires": { "nanoid": "^3.3.6", "picocolors": "^1.0.0", @@ -7223,9 +7223,9 @@ } }, "swagger-ui-dist": { - "version": "5.6.2", - "resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-5.6.2.tgz", - "integrity": "sha512-2LKVuU2m6RHkemJloKiKJOTpN2RPmbsiad0OfSdtmFHOXJKAgYRZMwJcpT96RX6E9HUB5RkVOFC6vWqVjRgSOg==" + "version": "4.19.1", + "resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-4.19.1.tgz", + "integrity": "sha512-n/gFn+R7G/BXWwl5UZLw6F1YgWOlf3zkwGlsPhTMhNtAAolBGKg0JS5b2RKt5NI6/hSopVaSrki2wTIMUDDy2w==" }, "symbol-tree": { "version": "3.2.4", diff --git a/kyuubi-server/web-ui/pnpm-lock.yaml b/kyuubi-server/web-ui/pnpm-lock.yaml index 83754291beb..ffed6c6bd4c 100644 --- a/kyuubi-server/web-ui/pnpm-lock.yaml +++ b/kyuubi-server/web-ui/pnpm-lock.yaml @@ -797,7 +797,7 @@ packages: '@vue/shared': 3.2.37 estree-walker: 2.0.2 magic-string: 0.25.9 - postcss: 8.4.16 + postcss: 8.4.31 source-map: 0.6.1 /@vue/compiler-ssr@3.2.37: @@ -2059,16 +2059,10 @@ packages: resolution: {integrity: sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==} dev: true - /nanoid@3.3.4: - resolution: {integrity: sha512-MqBkQh/OHTS2egovRtLk45wEyNXwF+cokD+1YPf9u5VfJiRdAiRwB2froX5Co9Rh20xs4siNPm8naNotSD6RBw==} - engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} - hasBin: true - /nanoid@3.3.6: resolution: {integrity: sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==} engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} hasBin: true - dev: true /natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} @@ -2242,22 +2236,13 @@ packages: util-deprecate: 1.0.2 dev: true - /postcss@8.4.16: - resolution: {integrity: sha512-ipHE1XBvKzm5xI7hiHCZJCSugxvsdq2mPnsq5+UF+VHCjiBvtDrlxJfMBToWaP9D5XlgNmcFGqoHmUn0EYEaRQ==} - engines: {node: ^10 || ^12 || >=14} - dependencies: - nanoid: 3.3.4 - picocolors: 1.0.0 - source-map-js: 1.0.2 - - /postcss@8.4.24: - resolution: {integrity: sha512-M0RzbcI0sO/XJNucsGjvWU9ERWxb/ytp1w6dKtxTKgixdtQDq4rmx/g8W1hnaheq9jgwL/oyEdH5Bc4WwJKMqg==} + /postcss@8.4.31: + resolution: {integrity: sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==} engines: {node: ^10 || ^12 || >=14} dependencies: nanoid: 3.3.6 picocolors: 1.0.0 source-map-js: 1.0.2 - dev: true /prelude-ls@1.1.2: resolution: {integrity: sha512-ESF23V4SKG6lVSGZgYNpbsiaAkdab6ZgOxe52p7+Kid3W3u3bxR4Vfd/o21dmN7jSt0IwgZ4v5MUd26FEtXE9w==} @@ -2674,7 +2659,7 @@ packages: dependencies: '@types/node': 18.7.6 esbuild: 0.17.19 - postcss: 8.4.24 + postcss: 8.4.31 resolve: 1.22.1 rollup: 3.24.0 sass: 1.54.4 From 143b26b6e8b825cc7f75d339e55df2857f94f766 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 16 Oct 2023 18:54:57 +0800 Subject: [PATCH 09/33] [KYUUBI #5284] Support Hudi Alter Table Command in Authz ### _Why are the changes needed?_ To close #5284. Hudi also is a common used data format, since kyuubi already support iceberg and delta, we should also support hudi. In this pr we support hoodie sql about ALTER COMMAND in authz In this PR we use default Hudi version 0.14.0. We support from spark 3.1 to spark 3.4, since Hudi don't support spark 3.5 yet - [x] spark 3.1 - [x] spark 3.2 - [x] spark 3.3 - [x] spark 3.4 - [ ] spark 3.5 Also since Hudi only supports Scala 2.12, I also made Hudi as a separate profile to avoid importing Hudi when enable Scala 2.13 ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5287 from AngersZhuuuu/KYUUBI-5284. Closes #5284 f171e11af [Angerszhuuuu] Update pom.xml 3f57a3dc5 [Angerszhuuuu] follow comment f6c764028 [Angerszhuuuu] follow comment 51797e25c [Angerszhuuuu] trigger b3c059af9 [Angerszhuuuu] Update HudiCatalogRangerSparkExtensionSuite.scala 3510e7601 [liangbowen] remove scope in dependencyManagement 14ea0d498 [liangbowen] change to use `spark.binary.version` for hudi dependency by default 354260eb0 [liangbowen] remove the abbreviation tite 658bddbab [liangbowen] remove clarification and use ALTERTABLE_PROPERTIES for opType 150edcd40 [Angerszhuuuu] update 30c417b19 [Angerszhuuuu] trigger 56e5cb17b [Angerszhuuuu] Update HudiCatalogRangerSparkExtensionSuite.scala fe9b75270 [Angerszhuuuu] update 888943831 [Angerszhuuuu] Update HudiCatalogRangerSparkExtensionSuite.scala db749a277 [Angerszhuuuu] update 9b09e78c2 [Angerszhuuuu] Update HudiCommands.scala 87de62e52 [Angerszhuuuu] follow comment 2d551d112 [Angerszhuuuu] Update master.yml 89082e06b [Angerszhuuuu] Update master.yml 7c7846378 [Angerszhuuuu] Merge branch 'KYUUBI-5284' of https://github.com/AngersZhuuuu/incubator-kyuubi into KYUUBI-5284 d32ca9839 [Angerszhuuuu] Update master.yml ec43e2a7b [Angerszhuuuu] Merge branch 'master' into KYUUBI-5284 b3611fd3e [Angerszhuuuu] update 2a0dfa74f [Angerszhuuuu] Update AuthZUtils.scala 45ee9e251 [Angerszhuuuu] update 0560a5e14 [Angerszhuuuu] Update pom.xml 97c50f622 [Angerszhuuuu] update f57ee0093 [Angerszhuuuu] Update table_command_spec.json fb72197e6 [Angerszhuuuu] update 2154cf928 [Angerszhuuuu] trigger 44469359f [Angerszhuuuu] trigger b0e768cb8 [Angerszhuuuu] Update HoodieCatalogRangerSparkExtensionSuite.scala 83795ed63 [Angerszhuuuu] Update pom.xml eed190f92 [Angerszhuuuu] update 361660145 [Angerszhuuuu] update 1ed1f3ab6 [Angerszhuuuu] Update 7ee3c7dd5 [Angerszhuuuu] Merge branch 'KYUUBI-5284' of https://github.com/AngersZhuuuu/incubator-kyuubi into KYUUBI-5284 ee0916f63 [Angerszhuuuu] Update HoodieCatalogRangerSparkExtensionSuite.scala 010260fa4 [Angerszhuuuu] Merge branch 'master' into KYUUBI-5284 c11d02def [Angerszhuuuu] update b84f91f65 [Angerszhuuuu] update 42fbb0ffa [Angerszhuuuu] Update HoodieCatalogRangerSparkExtensionSuite.scala c1346adb1 [Angerszhuuuu] update 2ec63ae94 [Angerszhuuuu] Update pom.xml 39bce7468 [Angerszhuuuu] update c70b0ea2f [Angerszhuuuu] Update pom.xml e1d85ff77 [Angerszhuuuu] Update pom.xml 59012ac25 [Angerszhuuuu] Update pom.xml a46de65b5 [Angerszhuuuu] Update HoodieTest.java b8173b893 [Angerszhuuuu] update 055713329 [Angerszhuuuu] Update table_command_spec.json d7b21e820 [Angerszhuuuu] Update HoodieCatalogRangerSparkExtensionSuite.scala 0a93ff794 [Angerszhuuuu] [KYUUBI #5284] Kyuubi authz support Hoodie Alter Table Command Lead-authored-by: Angerszhuuuu Co-authored-by: liangbowen Signed-off-by: Kent Yao --- .github/workflows/master.yml | 8 +- extensions/spark/kyuubi-spark-authz/pom.xml | 17 +++ .../main/resources/table_command_spec.json | 97 +++++++++++++ .../plugin/spark/authz/util/AuthZUtils.scala | 6 + .../spark/authz/RangerTestResources.scala | 1 + .../plugin/spark/authz/gen/HudiCommands.scala | 82 +++++++++++ .../authz/gen/JsonSpecFileGenerator.scala | 2 +- ...HudiCatalogRangerSparkExtensionSuite.scala | 132 ++++++++++++++++++ .../java/org/apache/kyuubi/tags/HudiTest.java | 29 ++++ pom.xml | 16 ++- 10 files changed, 382 insertions(+), 8 deletions(-) create mode 100644 extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala create mode 100644 extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala create mode 100644 kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/HudiTest.java diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index c3cc537366e..f590ea2671c 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -60,17 +60,17 @@ jobs: - java: 8 spark: '3.4' spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.1.3 -Dspark.archive.name=spark-3.1.3-bin-hadoop3.2.tgz -Pzookeeper-3.6' - exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.SparkLocalClusterTest' + exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest' comment: 'verify-on-spark-3.1-binary' - java: 8 spark: '3.4' spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.2.4 -Dspark.archive.name=spark-3.2.4-bin-hadoop3.2.tgz -Pzookeeper-3.6' - exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.SparkLocalClusterTest' + exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest' comment: 'verify-on-spark-3.2-binary' - java: 8 spark: '3.4' spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.3.3 -Dspark.archive.name=spark-3.3.3-bin-hadoop3.tgz -Pzookeeper-3.6' - exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.SparkLocalClusterTest' + exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.SparkLocalClusterTest' comment: 'verify-on-spark-3.3-binary' - java: 8 spark: '3.4' @@ -108,7 +108,7 @@ jobs: run: | TEST_MODULES="dev/kyuubi-codecov" ./build/mvn clean install ${MVN_OPT} -pl ${TEST_MODULES} -am \ - -Pspark-${{ matrix.spark }} ${{ matrix.spark-archive }} ${{ matrix.exclude-tags }} + -Pspark-${{ matrix.spark }} -Pspark-authz-hudi-test ${{ matrix.spark-archive }} ${{ matrix.exclude-tags }} - name: Code coverage if: | matrix.java == 8 && diff --git a/extensions/spark/kyuubi-spark-authz/pom.xml b/extensions/spark/kyuubi-spark-authz/pom.xml index 1ae63fcb34f..97145e51485 100644 --- a/extensions/spark/kyuubi-spark-authz/pom.xml +++ b/extensions/spark/kyuubi-spark-authz/pom.xml @@ -336,6 +336,23 @@ + + + spark-authz-hudi-test + + + org.apache.hudi + hudi-spark${hudi.spark.binary.version}-bundle_${scala.binary.version} + ${hudi.version} + test + + + + gen-policy diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json index 06d76c7e530..2febac11b3e 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json @@ -1409,4 +1409,101 @@ "fieldName" : "query", "fieldExtractor" : "LogicalPlanQueryExtractor" } ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.AlterHoodieTableAddColumnsCommand", + "tableDescs" : [ { + "fieldName" : "tableId", + "fieldExtractor" : "TableIdentifierTableExtractor", + "columnDesc" : { + "fieldName" : "colsToAdd", + "fieldExtractor" : "StructFieldSeqColumnExtractor" + }, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "ALTERTABLE_ADDCOLS", + "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.AlterHoodieTableChangeColumnCommand", + "tableDescs" : [ { + "fieldName" : "tableIdentifier", + "fieldExtractor" : "TableIdentifierTableExtractor", + "columnDesc" : { + "fieldName" : "columnName", + "fieldExtractor" : "StringColumnExtractor" + }, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "ALTERTABLE_REPLACECOLS", + "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.AlterHoodieTableDropPartitionCommand", + "tableDescs" : [ { + "fieldName" : "tableIdentifier", + "fieldExtractor" : "TableIdentifierTableExtractor", + "columnDesc" : { + "fieldName" : "partitionSpecs", + "fieldExtractor" : "PartitionSeqColumnExtractor" + }, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "ALTERTABLE_DROPPARTS", + "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.AlterHoodieTableRenameCommand", + "tableDescs" : [ { + "fieldName" : "oldName", + "fieldExtractor" : "TableIdentifierTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : { + "fieldName" : "oldName", + "fieldExtractor" : "TableIdentifierTableTypeExtractor", + "skipTypes" : [ "TEMP_VIEW" ] + }, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "ALTERTABLE_RENAME", + "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.AlterTableCommand", + "tableDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "ALTERTABLE_PROPERTIES", + "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.Spark31AlterTableCommand", + "tableDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "ALTERTABLE_PROPERTIES", + "queryDescs" : [ ] } ] \ No newline at end of file diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/AuthZUtils.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/AuthZUtils.scala index e95ff91ed57..2477c9e45d5 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/AuthZUtils.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/AuthZUtils.scala @@ -86,6 +86,12 @@ private[authz] object AuthZUtils { lazy val SPARK_RUNTIME_VERSION: SemanticVersion = SemanticVersion(SPARK_VERSION) lazy val isSparkV32OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.2" lazy val isSparkV33OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.3" + lazy val isSparkV34OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.4" + lazy val isSparkV35OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.5" + + lazy val SCALA_RUNTIME_VERSION: SemanticVersion = + SemanticVersion(scala.util.Properties.versionNumberString) + lazy val isScalaV213: Boolean = SCALA_RUNTIME_VERSION >= "2.13" def quoteIfNeeded(part: String): String = { if (part.matches("[a-zA-Z0-9_]+") && !part.matches("\\d+")) { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/RangerTestResources.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/RangerTestResources.scala index 2297f73f9c4..0b1df64da78 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/RangerTestResources.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/RangerTestResources.scala @@ -40,6 +40,7 @@ object RangerTestNamespace { val defaultDb = "default" val sparkCatalog = "spark_catalog" val icebergNamespace = "iceberg_ns" + val hudiNamespace = "hudi_ns" val namespace1 = "ns1" val namespace2 = "ns2" } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala new file mode 100644 index 00000000000..6e3237d2a72 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.gen + +import org.apache.kyuubi.plugin.spark.authz.OperationType._ +import org.apache.kyuubi.plugin.spark.authz.serde._ +import org.apache.kyuubi.plugin.spark.authz.serde.TableType._ + +object HudiCommands { + val AlterHoodieTableAddColumnsCommand = { + val cmd = "org.apache.spark.sql.hudi.command.AlterHoodieTableAddColumnsCommand" + val columnDesc = ColumnDesc("colsToAdd", classOf[StructFieldSeqColumnExtractor]) + val tableDesc = TableDesc("tableId", classOf[TableIdentifierTableExtractor], Some(columnDesc)) + TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_ADDCOLS) + } + + val AlterHoodieTableChangeColumnCommand = { + val cmd = "org.apache.spark.sql.hudi.command.AlterHoodieTableChangeColumnCommand" + val columnDesc = ColumnDesc("columnName", classOf[StringColumnExtractor]) + val tableDesc = + TableDesc("tableIdentifier", classOf[TableIdentifierTableExtractor], Some(columnDesc)) + TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_REPLACECOLS) + } + + val AlterHoodieTableDropPartitionCommand = { + val cmd = "org.apache.spark.sql.hudi.command.AlterHoodieTableDropPartitionCommand" + val columnDesc = ColumnDesc("partitionSpecs", classOf[PartitionSeqColumnExtractor]) + val tableDesc = + TableDesc("tableIdentifier", classOf[TableIdentifierTableExtractor], Some(columnDesc)) + TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_DROPPARTS) + } + + val AlterHoodieTableRenameCommand = { + val cmd = "org.apache.spark.sql.hudi.command.AlterHoodieTableRenameCommand" + val oldTableTableTypeDesc = + TableTypeDesc( + "oldName", + classOf[TableIdentifierTableTypeExtractor], + Seq(TEMP_VIEW)) + val oldTableD = TableDesc( + "oldName", + classOf[TableIdentifierTableExtractor], + tableTypeDesc = Some(oldTableTableTypeDesc)) + + TableCommandSpec(cmd, Seq(oldTableD), ALTERTABLE_RENAME) + } + + val AlterTableCommand = { + val cmd = "org.apache.spark.sql.hudi.command.AlterTableCommand" + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], None) + TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_PROPERTIES) + } + + val Spark31AlterTableCommand = { + val cmd = "org.apache.spark.sql.hudi.command.Spark31AlterTableCommand" + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], None) + TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_PROPERTIES) + } + + val data: Array[TableCommandSpec] = Array( + AlterHoodieTableAddColumnsCommand, + AlterHoodieTableChangeColumnCommand, + AlterHoodieTableDropPartitionCommand, + AlterHoodieTableRenameCommand, + AlterTableCommand, + Spark31AlterTableCommand) +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/JsonSpecFileGenerator.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/JsonSpecFileGenerator.scala index 855e25e87ea..1b2d330d1cb 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/JsonSpecFileGenerator.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/JsonSpecFileGenerator.scala @@ -43,7 +43,7 @@ class JsonSpecFileGenerator extends AnyFunSuite { // scalastyle:on test("check spec json files") { writeCommandSpecJson("database", DatabaseCommands.data) - writeCommandSpecJson("table", TableCommands.data ++ IcebergCommands.data) + writeCommandSpecJson("table", TableCommands.data ++ IcebergCommands.data ++ HudiCommands.data) writeCommandSpecJson("function", FunctionCommands.data) writeCommandSpecJson("scan", Scans.data) } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala new file mode 100644 index 00000000000..8fcae6cf941 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.plugin.spark.authz.ranger + +import org.apache.spark.SparkConf +import org.scalatest.Outcome + +import org.apache.kyuubi.Utils +import org.apache.kyuubi.plugin.spark.authz.AccessControlException +import org.apache.kyuubi.plugin.spark.authz.RangerTestNamespace._ +import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ +import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ +import org.apache.kyuubi.tags.HudiTest +import org.apache.kyuubi.util.AssertionUtils.interceptContains + +/** + * Tests for RangerSparkExtensionSuite on Hudi SQL. + * Run this test should enbale `hudi` profile. + */ +@HudiTest +class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { + override protected val catalogImpl: String = "hive" + // TODO: Apache Hudi not support Spark 3.5 and Scala 2.13 yet, + // should change after Apache Hudi support Spark 3.5 and Scala 2.13. + private def isSupportedVersion = !isSparkV35OrGreater && !isScalaV213 + + override protected val sqlExtensions: String = + if (isSupportedVersion) { + "org.apache.spark.sql.hudi.HoodieSparkSessionExtension" + } else { + "" + } + + override protected val extraSparkConf: SparkConf = + new SparkConf() + .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + + val namespace1 = hudiNamespace + val table1 = "table1_hoodie" + val table2 = "table2_hoodie" + val outputTable1 = "outputTable_hoodie" + + override def withFixture(test: NoArgTest): Outcome = { + assume(isSupportedVersion) + test() + } + + override def beforeAll(): Unit = { + if (isSupportedVersion) { + if (isSparkV32OrGreater) { + spark.conf.set( + s"spark.sql.catalog.$sparkCatalog", + "org.apache.spark.sql.hudi.catalog.HoodieCatalog") + spark.conf.set(s"spark.sql.catalog.$sparkCatalog.type", "hadoop") + spark.conf.set( + s"spark.sql.catalog.$sparkCatalog.warehouse", + Utils.createTempDir("hudi-hadoop").toString) + } + super.beforeAll() + } + } + + override def afterAll(): Unit = { + if (isSupportedVersion) { + super.afterAll() + spark.sessionState.catalog.reset() + spark.sessionState.conf.clear() + } + } + + test("AlterTableCommand") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (namespace1, "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs( + admin, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table1(id int, name string, city string) + |USING hudi + |OPTIONS ( + | type = 'cow', + | primaryKey = 'id', + | 'hoodie.datasource.hive_sync.enable' = 'false' + |) + |PARTITIONED BY(city) + |""".stripMargin)) + + // AlterHoodieTableAddColumnsCommand + interceptContains[AccessControlException]( + doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 ADD COLUMNS(age int)")))( + s"does not have [alter] privilege on [$namespace1/$table1/age]") + + // AlterHoodieTableChangeColumnCommand + interceptContains[AccessControlException]( + doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 CHANGE COLUMN id id bigint")))( + s"does not have [alter] privilege" + + s" on [$namespace1/$table1/id]") + + // AlterHoodieTableDropPartitionCommand + interceptContains[AccessControlException]( + doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 DROP PARTITION (city='test')")))( + s"does not have [alter] privilege" + + s" on [$namespace1/$table1/city]") + + // AlterHoodieTableRenameCommand + interceptContains[AccessControlException]( + doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 RENAME TO $namespace1.$table2")))( + s"does not have [alter] privilege" + + s" on [$namespace1/$table1]") + + // AlterTableCommand && Spark31AlterTableCommand + sql("set hoodie.schema.on.read.enable=true") + interceptContains[AccessControlException]( + doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 ADD COLUMNS(age int)")))( + s"does not have [alter] privilege on [$namespace1/$table1]") + } + } +} diff --git a/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/HudiTest.java b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/HudiTest.java new file mode 100644 index 00000000000..346f146faf2 --- /dev/null +++ b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/HudiTest.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.tags; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; +import org.scalatest.TagAnnotation; + +@TagAnnotation +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.METHOD, ElementType.TYPE}) +public @interface HudiTest {} diff --git a/pom.xml b/pom.xml index df1e0c3b706..5773eda53bf 100644 --- a/pom.xml +++ b/pom.xml @@ -158,6 +158,8 @@ false 4.5.14 4.4.16 + 0.14.0 + ${spark.binary.version} 1.4.0 2.15.0 4.0.4 @@ -234,7 +236,7 @@ 1.12.1 4.8.0 2.2.0 - org.scalatest.tags.Slow,org.apache.kyuubi.tags.IcebergTest + org.scalatest.tags.Slow,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest false 2.30.0 @@ -1475,6 +1477,12 @@ threeten-extra ${threeten.version} + + + org.apache.hudi + hudi-spark${hudi.spark.binary.version}-bundle_${scala.binary.version} + ${hudi.version} + @@ -2239,7 +2247,7 @@ 2.4.0 3.4.1 3.4 - org.scalatest.tags.Slow,org.apache.kyuubi.tags.IcebergTest + org.scalatest.tags.Slow,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest @@ -2250,6 +2258,8 @@ 2.4.0 + + 3.4 3.5.0 3.5 org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PySparkTest @@ -2260,7 +2270,7 @@ spark-master 4.0.0-SNAPSHOT - org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PySparkTest + org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.PySparkTest From 32b6dc3b743c1cf5c97508c1d320f873a694869d Mon Sep 17 00:00:00 2001 From: sychen Date: Mon, 16 Oct 2023 21:25:39 +0800 Subject: [PATCH 10/33] [KYUUBI #5426] [MINOR][KSHC] Avoid use class.newInstance directly ### _Why are the changes needed?_ Remove the deprecated usage. https://github.com/openjdk/jdk11u-dev/blob/c780db754e14ff77995dc090396d1958cea1ada1/src/java.base/share/classes/java/lang/Class.java#L534-L535 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No. Closes #5426 from cxzl25/newInstance. Closes #5426 dcb679b95 [sychen] avoid use class.newInstance directly Authored-by: sychen Signed-off-by: Cheng Pan --- .../spark/connector/hive/read/HivePartitionedReader.scala | 5 +++-- .../apache/kyuubi/spark/connector/hive/read/HiveReader.scala | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HivePartitionedReader.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HivePartitionedReader.scala index 732643eb149..5463a7bdd9f 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HivePartitionedReader.scala +++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HivePartitionedReader.scala @@ -47,7 +47,7 @@ case class HivePartitionedReader( private val hiveConf = broadcastHiveConf.value.value - private val tableDeser = tableDesc.getDeserializerClass.newInstance() + private val tableDeser = tableDesc.getDeserializerClass.getDeclaredConstructor().newInstance() tableDeser.initialize(hiveConf, tableDesc.getProperties) private val localDeser: Deserializer = bindPartitionOpt match { @@ -55,7 +55,8 @@ case class HivePartitionedReader( val tableProperties = tableDesc.getProperties val props = new Properties(tableProperties) val deserializer = - bindPartition.getDeserializer.getClass.asInstanceOf[Class[Deserializer]].newInstance() + bindPartition.getDeserializer.getClass.asInstanceOf[ + Class[Deserializer]].getDeclaredConstructor().newInstance() deserializer.initialize(hiveConf, props) deserializer case _ => tableDeser diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveReader.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveReader.scala index 54f6e80c07e..f6a7b194edd 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveReader.scala +++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveReader.scala @@ -62,7 +62,7 @@ object HiveReader { HiveShim.appendReadColumns(hiveConf, neededColumnIDs, neededColumnNames) - val deserializer = tableDesc.getDeserializerClass.newInstance + val deserializer = tableDesc.getDeserializerClass.getDeclaredConstructor().newInstance() deserializer.initialize(hiveConf, tableDesc.getProperties) // Specifies types and object inspectors of columns to be scanned. From 64ea27c1ff9da82c273078c70a15e709c3dbfb49 Mon Sep 17 00:00:00 2001 From: minyk Date: Mon, 16 Oct 2023 21:27:54 +0800 Subject: [PATCH 11/33] [KYUUBI #5425] Add thread name and ID to log4j2-defaults.xml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### _Why are the changes needed?_ Add `%tn` to `log4j2-defaults.xml` for more informations. This close #5425 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [X] Add screenshots for manual tests if appropriate ![Screenshot 2023-10-16 at 6 05 00 PM](https://github.com/apache/kyuubi/assets/1802676/0d155d40-78f5-4f9f-beea-f592e409e523) - [X] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5428 from minyk/kyuubi-5425. Closes #5425 e42a4b286 [minyk] add thread name and id to log4j2.xml.template 705e86e49 [minyk] add thread name and id to log pattern Authored-by: minyk Signed-off-by: Cheng Pan --- conf/log4j2.xml.template | 2 +- kyuubi-common/src/main/resources/log4j2-defaults.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/log4j2.xml.template b/conf/log4j2.xml.template index 4139b7dbef2..2601690eb90 100644 --- a/conf/log4j2.xml.template +++ b/conf/log4j2.xml.template @@ -30,7 +30,7 @@ - + diff --git a/kyuubi-common/src/main/resources/log4j2-defaults.xml b/kyuubi-common/src/main/resources/log4j2-defaults.xml index 630584611a1..7a1a3323569 100644 --- a/kyuubi-common/src/main/resources/log4j2-defaults.xml +++ b/kyuubi-common/src/main/resources/log4j2-defaults.xml @@ -21,7 +21,7 @@ - + From 5940fd14108b605e8daadd1f25a14bc8dc793a49 Mon Sep 17 00:00:00 2001 From: Omkar Makhare Date: Mon, 16 Oct 2023 22:03:57 +0800 Subject: [PATCH 12/33] [KYUUBI #5432] Fix typo in README.md ## Description: This Pull Request fixes a typographical error in the README.md file. ## Changes Made: "e.t.c." has been changed to "etc." for the correct abbreviation. ## Additional Information: This fix is not related to any existing issue. It's a minor typo that I noticed while reviewing the README. Thank you for considering this contribution. Closes #5431 from omimakhare/patch-1. Closes #5432 b5c1aa650 [OMKAR MAKHARE] Update README.md Lead-authored-by: Omkar Makhare Co-authored-by: OMKAR MAKHARE <114830033+omimakhare@users.noreply.github.com> Signed-off-by: Cheng Pan --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6b1423e7855..d87cfabd85d 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ In typical big data production environments with Kyuubi, there should be system - System administrators: A small group consists of Spark experts responsible for Kyuubi deployment, configuration, and tuning. - End-users: Focus on business data of their own, not where it stores, how it computes. -Additionally, the Kyuubi community will continuously optimize the whole system with various features, such as History-Based Optimizer, Auto-tuning, Materialized View, SQL Dialects, Functions, e.t.c. +Additionally, the Kyuubi community will continuously optimize the whole system with various features, such as History-Based Optimizer, Auto-tuning, Materialized View, SQL Dialects, Functions, etc. ### Usage scenarios From b24d94e74f3cc6648c09d0d44ead781b1b2e70b1 Mon Sep 17 00:00:00 2001 From: zwangsheng Date: Mon, 16 Oct 2023 22:43:02 +0800 Subject: [PATCH 13/33] [KYUUBI #5328] Batch supports priority scheduling ### _Why are the changes needed?_ Follow #5329 and close #5328: 1. Add new config `kyuubi.metadata.store.jdbc.priority.enabled` to control whether enable priority scheduling, due to users may experience performance issues when using MySQL5.7 as metastore backend and enabling kyuubi batch v2 priority feature. 2. When priority scheduling is enabled, `KyuubiBatchService` picks metadata job with `ORDER BY priority DESC, create_time ASC`. 3. Insert metadata with priority field, default priority value is `10`. 4. Add new config `kyuubi.batch.priority` for each batch priority. ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5352 from zwangsheng/KYUUBI#5328. Closes #5328 687ed1ed6 [Cheng Pan] Update kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala 58621b557 [zwangsheng] fix comments 1bf81e75c [zwangsheng] fix style 7ed2551b3 [zwangsheng] update default priority desc & improve UT 21ceccb01 [zwangsheng] fix doc 27fc5e825 [zwangsheng] enrich desc c0bbc0dfd [zwangsheng] fix style 6b8d0f091 [zwangsheng] fix comment 67eb2524d [zwangsheng] fix comment e1705c34d [zwangsheng] Add config to control whether pick order by priority or not 129a46729 [zwangsheng] Add unit test for pickBatchForSubmitting fcaf85d92 [zwangsheng] Fix unit test f7ca2219e [zwangsheng] Fix unit test 8d4b276ff [wangsheng] fix code style 4c6b99090 [wangsheng] fix comments 654ad843a [zwangsheng] [KYUUBI #5328][V2] Kyuubi Server Pick Metadata job with priority Lead-authored-by: zwangsheng Co-authored-by: wangsheng <2213335496@qq.com> Co-authored-by: Cheng Pan Signed-off-by: Cheng Pan --- docs/configuration/settings.md | 1 + .../kyuubi/config/KyuubiReservedKeys.scala | 3 + .../kyuubi/server/metadata/api/Metadata.scala | 3 + .../metadata/jdbc/JDBCMetadataStore.scala | 12 ++-- .../metadata/jdbc/JDBCMetadataStoreConf.scala | 12 ++++ .../kyuubi/session/KyuubiBatchSession.scala | 4 +- .../kyuubi/session/KyuubiSessionManager.scala | 5 +- .../metadata/MetadataManagerSuite.scala | 67 +++++++++++++++++-- 8 files changed, 96 insertions(+), 11 deletions(-) diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index 2869a59cd92..d9d8d95efb1 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -347,6 +347,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.metadata.store.jdbc.database.type | SQLITE | The database type for server jdbc metadata store.
  • (Deprecated) DERBY: Apache Derby, JDBC driver `org.apache.derby.jdbc.AutoloadedDriver`.
  • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
  • MYSQL: MySQL, JDBC driver `com.mysql.jdbc.Driver`.
  • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
  • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | | kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | | kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | | kyuubi.metadata.store.jdbc.url | jdbc:sqlite:kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. | string | 1.6.0 | | kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiReservedKeys.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiReservedKeys.scala index eb209caec99..592425a4b4c 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiReservedKeys.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiReservedKeys.scala @@ -26,6 +26,9 @@ object KyuubiReservedKeys { final val KYUUBI_SESSION_USER_SIGN = "kyuubi.session.user.sign" final val KYUUBI_SESSION_REAL_USER_KEY = "kyuubi.session.real.user" final val KYUUBI_SESSION_CONNECTION_URL_KEY = "kyuubi.session.connection.url" + // default priority is 10, higher priority will be scheduled first + // when enabled metadata store priority feature + final val KYUUBI_BATCH_PRIORITY = "kyuubi.batch.priority" final val KYUUBI_BATCH_RESOURCE_UPLOADED_KEY = "kyuubi.batch.resource.uploaded" final val KYUUBI_STATEMENT_ID_KEY = "kyuubi.statement.id" final val KYUUBI_ENGINE_ID = "kyuubi.engine.id" diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/api/Metadata.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/api/Metadata.scala index 3e3d9482841..0553cf90b54 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/api/Metadata.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/api/Metadata.scala @@ -78,6 +78,9 @@ case class Metadata( engineState: String = null, engineError: Option[String] = None, endTime: Long = 0L, + // keep consistent with table creation DDL + // find why we set 10 as default in KYUUBI #5329 + priority: Int = 10, peerInstanceClosed: Boolean = false) { def appMgrInfo: ApplicationManagerInfo = { ApplicationManagerInfo( diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala index dcb9c0f6685..419fa844750 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala @@ -61,6 +61,8 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { case CUSTOM => new GenericDatabaseDialect } + private val priorityEnabled = conf.get(METADATA_STORE_JDBC_PRIORITY_ENABLED) + private val datasourceProperties = JDBCMetadataStoreConf.getMetadataStoreJDBCDataSourceProperties(conf) private val hikariConfig = new HikariConfig(datasourceProperties) @@ -167,9 +169,10 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { |request_args, |create_time, |engine_type, - |cluster_manager + |cluster_manager, + |priority |) - |VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + |VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) |""".stripMargin JdbcUtils.withConnection { connection => @@ -190,7 +193,8 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { valueAsString(metadata.requestArgs), metadata.createTime, Option(metadata.engineType).map(_.toUpperCase(Locale.ROOT)).orNull, - metadata.clusterManager.orNull) + metadata.clusterManager.orNull, + metadata.priority) } } @@ -198,7 +202,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { JdbcUtils.executeQueryWithRowMapper( s"""SELECT identifier FROM $METADATA_TABLE |WHERE state=? - |ORDER BY create_time ASC LIMIT 1 + |ORDER BY ${if (priorityEnabled) "priority DESC, " else ""}create_time ASC LIMIT 1 |""".stripMargin) { stmt => stmt.setString(1, OperationState.INITIALIZED.toString) } { resultSet => diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala index dd5d741382f..292cf417483 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala @@ -93,4 +93,16 @@ object JDBCMetadataStoreConf { .serverOnly .stringConf .createWithDefault("") + + val METADATA_STORE_JDBC_PRIORITY_ENABLED: ConfigEntry[Boolean] = + buildConf("kyuubi.metadata.store.jdbc.priority.enabled") + .doc("Whether to enable the priority scheduling for batch impl v2. " + + "When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy " + + "for batch job scheduling. Note: this feature may cause significant performance issues " + + "when using MySQL 5.7 as the metastore backend due to the lack of support " + + "for mixed order index. See more details at KYUUBI #5329.") + .version("1.8.0") + .serverOnly + .booleanConf + .createWithDefault(false) } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSession.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSession.scala index 8e4c5137fbf..8489e6d307b 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSession.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSession.scala @@ -23,6 +23,7 @@ import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.client.util.BatchUtils._ import org.apache.kyuubi.config.{KyuubiConf, KyuubiReservedKeys} +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_BATCH_PRIORITY import org.apache.kyuubi.engine.KyuubiApplicationManager import org.apache.kyuubi.engine.spark.SparkProcessBuilder import org.apache.kyuubi.events.{EventBus, KyuubiSessionEvent} @@ -181,7 +182,8 @@ class KyuubiBatchSession( requestArgs = batchArgs, createTime = createTime, engineType = batchType, - clusterManager = batchJobSubmissionOp.builder.clusterManager()) + clusterManager = batchJobSubmissionOp.builder.clusterManager(), + priority = conf.get(KYUUBI_BATCH_PRIORITY).map(_.toInt).getOrElse(10)) // there is a chance that operation failed w/ duplicated key error sessionManager.insertMetadata(newMetadata) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala index 8d323469959..02a3ee32c7c 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala @@ -30,7 +30,7 @@ import org.apache.kyuubi.client.api.v1.dto.{Batch, BatchRequest} import org.apache.kyuubi.client.util.BatchUtils.KYUUBI_BATCH_ID_KEY import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ -import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_REAL_USER_KEY +import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_BATCH_PRIORITY, KYUUBI_SESSION_REAL_USER_KEY} import org.apache.kyuubi.credentials.HadoopCredentialsManager import org.apache.kyuubi.engine.KyuubiApplicationManager import org.apache.kyuubi.metrics.MetricsConstants._ @@ -237,7 +237,8 @@ class KyuubiSessionManager private (name: String) extends SessionManager(name) { requestConf = conf, requestArgs = batchRequest.getArgs.asScala.toSeq, createTime = System.currentTimeMillis(), - engineType = batchRequest.getBatchType) + engineType = batchRequest.getBatchType, + priority = conf.get(KYUUBI_BATCH_PRIORITY).map(_.toInt).getOrElse(10)) // there is a chance that operation failed w/ duplicated key error metadataManager.foreach(_.insertMetadata(metadata, asyncRetryOnError = false)) diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/MetadataManagerSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/MetadataManagerSuite.scala index 564b5ebe939..fe7fa586858 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/MetadataManagerSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/MetadataManagerSuite.scala @@ -28,7 +28,9 @@ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.metrics.MetricsConstants._ +import org.apache.kyuubi.operation.OperationState import org.apache.kyuubi.server.metadata.api.{Metadata, MetadataFilter} +import org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStoreConf.METADATA_STORE_JDBC_PRIORITY_ENABLED import org.apache.kyuubi.session.SessionType class MetadataManagerSuite extends KyuubiFunSuite { @@ -142,6 +144,58 @@ class MetadataManagerSuite extends KyuubiFunSuite { } } + test("[KYUUBI #5328] Test MetadataManager#pickBatchForSubmitting in order") { + // build mock batch jobs + val mockKyuubiInstance = "mock_kyuubi_instance" + val time = System.currentTimeMillis() + val mockBatchJob1 = newMetadata( + identifier = "mock_batch_job_1", + state = OperationState.INITIALIZED.toString, + createTime = time + 10000, + // larger than default priority 10 + priority = 20) + val mockBatchJob2 = newMetadata( + identifier = "mock_batch_job_2", + state = OperationState.INITIALIZED.toString, + createTime = time) + val mockBatchJob3 = newMetadata( + identifier = "mock_batch_job_3", + state = OperationState.INITIALIZED.toString, + createTime = time + 5000) + + withMetadataManager(Map(METADATA_STORE_JDBC_PRIORITY_ENABLED.key -> "true")) { + metadataManager => + metadataManager.insertMetadata(mockBatchJob1, asyncRetryOnError = false) + metadataManager.insertMetadata(mockBatchJob2, asyncRetryOnError = false) + metadataManager.insertMetadata(mockBatchJob3, asyncRetryOnError = false) + + // pick the highest priority batch job + val metadata1 = metadataManager.pickBatchForSubmitting(mockKyuubiInstance) + assert(metadata1.exists(m => m.identifier === "mock_batch_job_1")) + + // pick the oldest batch job when same priority + val metadata2 = metadataManager.pickBatchForSubmitting(mockKyuubiInstance) + assert(metadata2.exists(m => m.identifier === "mock_batch_job_2")) + + val metadata3 = metadataManager.pickBatchForSubmitting(mockKyuubiInstance) + assert(metadata3.exists(m => m.identifier === "mock_batch_job_3")) + } + + withMetadataManager(Map(METADATA_STORE_JDBC_PRIORITY_ENABLED.key -> "false")) { + metadataManager => + metadataManager.insertMetadata(mockBatchJob1, asyncRetryOnError = false) + metadataManager.insertMetadata(mockBatchJob2, asyncRetryOnError = false) + metadataManager.insertMetadata(mockBatchJob3, asyncRetryOnError = false) + + // pick the oldest batch job + val metadata2 = metadataManager.pickBatchForSubmitting(mockKyuubiInstance) + assert(metadata2.exists(m => m.identifier === "mock_batch_job_2")) + + val metadata3 = metadataManager.pickBatchForSubmitting(mockKyuubiInstance) + assert(metadata3.exists(m => m.identifier === "mock_batch_job_3")) + } + } + private def withMetadataManager( confOverlay: Map[String, String], newMetadataMgr: () => MetadataManager = () => new MetadataManager())( @@ -169,22 +223,27 @@ class MetadataManagerSuite extends KyuubiFunSuite { } } - private def newMetadata(): Metadata = { + private def newMetadata( + identifier: String = UUID.randomUUID().toString, + state: String = OperationState.PENDING.toString, + createTime: Long = System.currentTimeMillis(), + priority: Int = 10): Metadata = { Metadata( - identifier = UUID.randomUUID().toString, + identifier = identifier, sessionType = SessionType.BATCH, realUser = "kyuubi", username = "kyuubi", ipAddress = "127.0.0.1", kyuubiInstance = "localhost:10009", - state = "PENDING", + state = state, resource = "intern", className = "org.apache.kyuubi.SparkWC", requestName = "kyuubi_batch", requestConf = Map("spark.master" -> "local"), requestArgs = Seq("100"), - createTime = System.currentTimeMillis(), + createTime = createTime, engineType = "spark", + priority = priority, clusterManager = Some("local")) } } From c60f5b7e8872efb15597b962da1873a4ba2e71ef Mon Sep 17 00:00:00 2001 From: fwang12 Date: Mon, 16 Oct 2023 23:46:55 +0800 Subject: [PATCH 14/33] [KYUUBI #5196][FOLLOWUP] Extract spark core scala version lazily and respect engine env ### _Why are the changes needed?_ Only extract the spark core scala version if `SPARK_SCALA_VERSION` env is empty, and respect engine env. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No. Closes #5434 from turboFei/lazy_scala_version. Closes #5196 fdccef77b [fwang12] lazy extract spark core scala version Authored-by: fwang12 Signed-off-by: fwang12 --- .../org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala index 02f4064afc6..afc96fb5ea0 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala @@ -108,9 +108,9 @@ class SparkProcessBuilder( } override protected val engineScalaBinaryVersion: String = { - val sparkCoreScalaVersion = + env.get("SPARK_SCALA_VERSION").filter(StringUtils.isNotBlank).getOrElse { extractSparkCoreScalaVersion(Paths.get(sparkHome, "jars").toFile.list()) - StringUtils.defaultIfBlank(System.getenv("SPARK_SCALA_VERSION"), sparkCoreScalaVersion) + } } override protected lazy val engineHomeDirFilter: FileFilter = file => { From f75e4acf6ceee31a74e57b41cd431ee65f184376 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 17 Oct 2023 11:16:02 +0800 Subject: [PATCH 15/33] [KYUUBI #5417] Authz should not check dependent subquery plan privilege ### _Why are the changes needed?_ Fix #5417 If there is is a view with subquery, authz will still request this subquery's interval privilege, it's not we want. For view ``` CREATE VIEW db.view1 AS WITH temp AS ( SELECT max(scope) max_scope FROM db1.table1) SELECT id as new_id FROM db1.table2 WHERE scope = (SELECT max_scope FROM temp) ``` When we query the view ``` SEELCT * FROM db.view1 ``` Before this pr, since spark will first execute subquery, it will first request `[default/table1/scope]` then request `[default/view1/new_id]` after this pr, it only request `[default/view1/new_id]` ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5418 from AngersZhuuuu/KYUUBI-5417. Closes #5417 e2669faea [Angerszhuuuu] Update tableExtractors.scala bc72cfc57 [Angerszhuuuu] Update RuleApplyPermanentViewMarker.scala 1731b9317 [Angerszhuuuu] Update RuleEliminateViewMarker.scala 282999ee2 [Angerszhuuuu] follow comment 6b37aaa7f [Angerszhuuuu] Update RuleApplyPermanentViewMarker.scala d03354d58 [Angerszhuuuu] Revert "update" 7a96627e4 [Angerszhuuuu] update 78a32b3a5 [Angerszhuuuu] follow comment 79e07ab24 [Angerszhuuuu] Update PrivilegesBuilder.scala 518c2b394 [Angerszhuuuu] update d033624ea [Angerszhuuuu] update 54ff954f0 [Angerszhuuuu] update. 1119f78f6 [Angerszhuuuu] Update RangerSparkExtensionSuite.scala 1918381ac [Angerszhuuuu] Add UT 7723f9002 [Angerszhuuuu] [KYUUBI #5417]Authz will still check source table when persist view contains a subquery Authored-by: Angerszhuuuu Signed-off-by: Kent Yao --- .../ranger/RuleApplyPermanentViewMarker.scala | 10 ++++- .../spark/authz/serde/tableExtractors.scala | 12 +++-- .../authz/util/RuleEliminateViewMarker.scala | 7 ++- .../ranger/RangerSparkExtensionSuite.scala | 44 +++++++++++++++++++ 4 files changed, 67 insertions(+), 6 deletions(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala index 424df7e0b43..91741080759 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala @@ -17,6 +17,7 @@ package org.apache.kyuubi.plugin.spark.authz.ranger +import org.apache.spark.sql.catalyst.expressions.ScalarSubquery import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, View} import org.apache.spark.sql.catalyst.rules.Rule @@ -36,7 +37,14 @@ class RuleApplyPermanentViewMarker extends Rule[LogicalPlan] { plan mapChildren { case p: PermanentViewMarker => p case permanentView: View if hasResolvedPermanentView(permanentView) => - PermanentViewMarker(permanentView, permanentView.desc) + val resolvedSubquery = permanentView.transformAllExpressions { + case scalarSubquery: ScalarSubquery => + // TODO: Currently, we do not do an auth check in the subquery + // as the main query part also secures it. But for performance consideration, + // we also pre-check it in subqueries and fail fast with negative privileges. + scalarSubquery.copy(plan = PermanentViewMarker(scalarSubquery.plan, null)) + } + PermanentViewMarker(resolvedSubquery, resolvedSubquery.desc) case other => apply(other) } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/tableExtractors.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/tableExtractors.scala index 94641d6d060..57eab9634f7 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/tableExtractors.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/tableExtractors.scala @@ -91,10 +91,14 @@ class TableIdentifierTableExtractor extends TableExtractor { */ class CatalogTableTableExtractor extends TableExtractor { override def apply(spark: SparkSession, v1: AnyRef): Option[Table] = { - val catalogTable = v1.asInstanceOf[CatalogTable] - val identifier = catalogTable.identifier - val owner = Option(catalogTable.owner).filter(_.nonEmpty) - Some(Table(None, identifier.database, identifier.table, owner)) + if (null == v1) { + None + } else { + val catalogTable = v1.asInstanceOf[CatalogTable] + val identifier = catalogTable.identifier + val owner = Option(catalogTable.owner).filter(_.nonEmpty) + Some(Table(None, identifier.database, identifier.table, owner)) + } } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateViewMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateViewMarker.scala index 9bda84a035d..8044f1283e5 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateViewMarker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateViewMarker.scala @@ -17,6 +17,7 @@ package org.apache.kyuubi.plugin.spark.authz.util +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule @@ -25,6 +26,10 @@ import org.apache.spark.sql.catalyst.rules.Rule */ class RuleEliminateViewMarker extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { - plan.transformUp { case pvm: PermanentViewMarker => pvm.child } + plan.transformUp { + case pvm: PermanentViewMarker => pvm.child.transformAllExpressions { + case s: SubqueryExpression => s.withNewPlan(apply(s.plan)) + } + } } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala index a4148d9a542..d109a7f2b07 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala @@ -747,4 +747,48 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { assert(e.getMessage.contains(s"does not have [select] privilege on [$db1/$table/id]")) } } + + test("[KYUUBI #5417] should not check dependent subquery plan privilege") { + val db1 = defaultDb + val table1 = "table1" + val table2 = "table2" + val view1 = "view1" + withCleanTmpResources( + Seq((s"$db1.$table1", "table"), (s"$db1.$table2", "table"), (s"$db1.$view1", "view"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table2 (id int, scope int)")) + + val e1 = intercept[AccessControlException] { + doAs( + someone, + sql( + s""" + |WITH temp AS ( + | SELECT max(scope) max_scope + | FROM $db1.$table1) + |SELECT id as new_id FROM $db1.$table2 + |WHERE scope = (SELECT max_scope FROM temp) + |""".stripMargin).show()) + } + // Will first check subquery privilege. + assert(e1.getMessage.contains(s"does not have [select] privilege on [$db1/$table1/scope]")) + + doAs( + admin, + sql( + s""" + |CREATE VIEW $db1.$view1 + |AS + |WITH temp AS ( + | SELECT max(scope) max_scope + | FROM $db1.$table1) + |SELECT id as new_id FROM $db1.$table2 + |WHERE scope = (SELECT max_scope FROM temp) + |""".stripMargin)) + // Will just check permanent view privilege. + val e2 = intercept[AccessControlException]( + doAs(someone, sql(s"SELECT * FROM $db1.$view1".stripMargin).show())) + assert(e2.getMessage.contains(s"does not have [select] privilege on [$db1/$view1/new_id]")) + } + } } From dcaacc3ed569e419faf70aae563e44c5130d014f Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Tue, 17 Oct 2023 13:04:15 +0800 Subject: [PATCH 16/33] [KYUUBI #4916] [AUTHZ] Support ReplaceData and compatible Spark 3.4 and 3.5 ### _Why are the changes needed?_ - Iceberg 1.3.0 use Spark's `ReplaceData` since 3.4 as Logical plan for "DELETE FROM", instead of Iceberg's `DeleteFromTable` in Spark3.4 - Requiring select privilege for input table, even it's the same with output table - compatible Spark 3.4 and 3.5, subquies in the plan - enable iceberg test for authz plugin on Spark 3.4 and 3.5 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request Closes #4916 from bowenliang123/authz-replacedata. Closes #4916 658917752 [Bowen Liang] Merge branch 'master' into authz-replacedata 17f5d84dd [liangbowen] update 25009908a [Bowen Liang] ut of user with select only for DELETE FROM fa77cea15 [Bowen Liang] ut fd9bb8f3a [Bowen Liang] update f9cfb98a9 [Bowen Liang] assertion for Spark 3.5 f574e0da3 [Bowen Liang] assertion for Spark 3.5 2449c27de [Bowen Liang] bring back single call ut for someone 78786988d [Bowen Liang] bring back single call ut for someone b8e4a6319 [wforget] fix style 0e26c08b4 [wforget] fix IcebergCatalogRangerSparkExtensionSuite with spark-3.5 02781781f [wforget] enable iceberg tests in spark-3.5 215e1b861 [wforget] fix TableCommands d019b1632 [wforget] followup ae17e076b [wforget] Merge remote-tracking branch 'origin/master' into authz-replacedata b88f77355 [Bowen Liang] update febcb3ee5 [Bowen Liang] isSparkV34OrGreater 91d41b438 [Bowen Liang] replace data Lead-authored-by: Bowen Liang Co-authored-by: wforget <643348094@qq.com> Co-authored-by: Bowen Liang Co-authored-by: liangbowen Signed-off-by: Bowen Liang --- .../main/resources/table_command_spec.json | 21 +++++++ ...IcebergCatalogPrivilegesBuilderSuite.scala | 37 ++++++++++-- .../authz/V2CommandsPrivilegesSuite.scala | 12 +++- .../spark/authz/gen/TableCommands.scala | 12 ++++ ...bergCatalogRangerSparkExtensionSuite.scala | 59 ++++++++++++------- pom.xml | 4 +- 6 files changed, 117 insertions(+), 28 deletions(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json index 2febac11b3e..513259e1375 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json @@ -470,6 +470,27 @@ } ], "opType" : "ALTERTABLE_REPLACECOLS", "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.catalyst.plans.logical.ReplaceData", + "tableDescs" : [ { + "fieldName" : "originalTable", + "fieldExtractor" : "DataSourceV2RelationTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : { + "fieldName" : null, + "fieldExtractor" : null, + "actionType" : "UPDATE" + }, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "QUERY", + "queryDescs" : [ { + "fieldName" : "query", + "fieldExtractor" : "LogicalPlanQueryExtractor" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.ReplaceTable", "tableDescs" : [ { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala index 39966af916a..b8d51bc2cc9 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/IcebergCatalogPrivilegesBuilderSuite.scala @@ -22,6 +22,7 @@ import org.scalatest.Outcome import org.apache.kyuubi.Utils import org.apache.kyuubi.plugin.spark.authz.OperationType._ import org.apache.kyuubi.plugin.spark.authz.ranger.AccessType +import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ import org.apache.kyuubi.tags.IcebergTest import org.apache.kyuubi.util.AssertionUtils._ @@ -57,7 +58,15 @@ class IcebergCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite { val plan = sql(s"DELETE FROM $catalogTable WHERE key = 1 ").queryExecution.analyzed val (inputs, outputs, operationType) = PrivilegesBuilder.build(plan, spark) assert(operationType === QUERY) - assert(inputs.isEmpty) + if (isSparkV34OrGreater) { + assert(inputs.size === 1) + val po = inputs.head + assertEqualsIgnoreCase(namespace)(po.dbname) + assertEqualsIgnoreCase(catalogTableShort)(po.objectName) + assertContains(po.columns, "key", "value") + } else { + assert(inputs.size === 0) + } assert(outputs.size === 1) val po = outputs.head assert(po.actionType === PrivilegeObjectActionType.UPDATE) @@ -74,7 +83,15 @@ class IcebergCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite { val plan = sql(s"UPDATE $catalogTable SET value = 'b' WHERE key = 1 ").queryExecution.analyzed val (inputs, outputs, operationType) = PrivilegesBuilder.build(plan, spark) assert(operationType === QUERY) - assert(inputs.isEmpty) + if (isSparkV35OrGreater) { + assert(inputs.size === 1) + val po = inputs.head + assertEqualsIgnoreCase(namespace)(po.dbname) + assertEqualsIgnoreCase(catalogTableShort)(po.objectName) + assertContains(po.columns, "key", "value") + } else { + assert(inputs.size === 0) + } assert(outputs.size === 1) val po = outputs.head assert(po.actionType === PrivilegeObjectActionType.UPDATE) @@ -98,8 +115,20 @@ class IcebergCatalogPrivilegesBuilderSuite extends V2CommandsPrivilegesSuite { s"WHEN NOT MATCHED THEN INSERT *").queryExecution.analyzed val (inputs, outputs, operationType) = PrivilegesBuilder.build(plan, spark) assert(operationType === QUERY) - assert(inputs.size === 1) - val po0 = inputs.head + if (isSparkV35OrGreater) { + assert(inputs.size === 2) + val po = inputs.head + assert(po.actionType === PrivilegeObjectActionType.OTHER) + assert(po.privilegeObjectType === PrivilegeObjectType.TABLE_OR_VIEW) + assertEqualsIgnoreCase(namespace)(po.dbname) + assertEqualsIgnoreCase(table)(po.objectName) + assertContains(po.columns, "key", "value") + // The properties of RowLevelOperationTable are empty, so owner is none + assert(po.owner.isEmpty) + } else { + assert(inputs.size === 1) + } + val po0 = inputs.last assert(po0.actionType === PrivilegeObjectActionType.OTHER) assert(po0.privilegeObjectType === PrivilegeObjectType.TABLE_OR_VIEW) assertEqualsIgnoreCase(namespace)(po0.dbname) diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala index 3ebea1ce9d9..149c9ba8f6b 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala @@ -161,7 +161,11 @@ abstract class V2CommandsPrivilegesSuite extends PrivilegesBuilderSuite { assertEqualsIgnoreCase(namespace)(po.dbname) assertEqualsIgnoreCase(table)(po.objectName) assert(po.columns.isEmpty) - assert(po.owner.isEmpty) + if (isSparkV34OrGreater) { + checkV2TableOwner(po) + } else { + assert(po.owner.isEmpty) + } val accessType = AccessType(po, operationType, isInput = false) assert(accessType === AccessType.CREATE) } @@ -193,7 +197,11 @@ abstract class V2CommandsPrivilegesSuite extends PrivilegesBuilderSuite { assertEqualsIgnoreCase(namespace)(po.dbname) assertEqualsIgnoreCase(table)(po.objectName) assert(po.columns.isEmpty) - assert(po.owner.isEmpty) + if (isSparkV34OrGreater) { + checkV2TableOwner(po) + } else { + assert(po.owner.isEmpty) + } val accessType = AccessType(po, operationType, isInput = false) assert(accessType === AccessType.CREATE) } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala index 6a6800210dc..cf73cfbc61b 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala @@ -257,6 +257,17 @@ object TableCommands { TableCommandSpec(cmd, Seq(tableDesc), queryDescs = Seq(queryQueryDesc)) } + val ReplaceData = { + val cmd = "org.apache.spark.sql.catalyst.plans.logical.ReplaceData" + val actionTypeDesc = ActionTypeDesc(actionType = Some(UPDATE)) + val tableDesc = + TableDesc( + "originalTable", + classOf[DataSourceV2RelationTableExtractor], + actionTypeDesc = Some(actionTypeDesc)) + TableCommandSpec(cmd, Seq(tableDesc), queryDescs = Seq(queryQueryDesc)) + } + val UpdateTable = { val cmd = "org.apache.spark.sql.catalyst.plans.logical.UpdateTable" val actionTypeDesc = ActionTypeDesc(actionType = Some(UPDATE)) @@ -655,6 +666,7 @@ object TableCommands { RefreshTable, RefreshTableV2, RefreshTable3d0, + ReplaceData, ShowColumns, ShowCreateTable, ShowCreateTable.copy(classname = diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala index 49288055331..28e13aff3c0 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala @@ -47,6 +47,8 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite val namespace1 = icebergNamespace val table1 = "table1" val outputTable1 = "outputTable1" + val bobNamespace = "default_bob" + val bobSelectTable = "table_select_bob_1" override def withFixture(test: NoArgTest): Outcome = { test() @@ -77,6 +79,11 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite admin, sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$namespace1.$outputTable1" + " (id int, name string, city string) USING iceberg")) + + doAs( + admin, + sql(s"CREATE TABLE IF NOT EXISTS $catalogV2.$bobNamespace.$bobSelectTable" + + " (id int, name string, city string) USING iceberg")) } override def afterAll(): Unit = { @@ -88,7 +95,7 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite test("[KYUUBI #3515] MERGE INTO") { val mergeIntoSql = s""" - |MERGE INTO $catalogV2.$namespace1.$outputTable1 AS target + |MERGE INTO $catalogV2.$bobNamespace.$bobSelectTable AS target |USING $catalogV2.$namespace1.$table1 AS source |ON target.id = source.id |WHEN MATCHED AND (target.name='delete') THEN DELETE @@ -104,14 +111,19 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite s" on [$namespace1/$table1/id]")) withSingleCallEnabled { - val e2 = intercept[AccessControlException]( - doAs( - someone, - sql(mergeIntoSql))) - assert(e2.getMessage.contains(s"does not have" + - s" [select] privilege" + - s" on [$namespace1/$table1/id,$namespace1/table1/name,$namespace1/$table1/city]," + - s" [update] privilege on [$namespace1/$outputTable1]")) + interceptContains[AccessControlException](doAs(someone, sql(mergeIntoSql)))( + if (isSparkV35OrGreater) { + s"does not have [select] privilege on [$namespace1/table1/id" + + s",$namespace1/$table1/name,$namespace1/$table1/city]" + } else { + "does not have " + + s"[select] privilege on [$namespace1/$table1/id,$namespace1/$table1/name,$namespace1/$table1/city]," + + s" [update] privilege on [$bobNamespace/$bobSelectTable]" + }) + + interceptContains[AccessControlException] { + doAs(bob, sql(mergeIntoSql)) + }(s"does not have [update] privilege on [$bobNamespace/$bobSelectTable]") } doAs(admin, sql(mergeIntoSql)) @@ -119,13 +131,13 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite test("[KYUUBI #3515] UPDATE TABLE") { // UpdateTable - val e1 = intercept[AccessControlException]( - doAs( - someone, - sql(s"UPDATE $catalogV2.$namespace1.$table1 SET city='Guangzhou' " + - " WHERE id=1"))) - assert(e1.getMessage.contains(s"does not have [update] privilege" + - s" on [$namespace1/$table1]")) + interceptContains[AccessControlException] { + doAs(someone, sql(s"UPDATE $catalogV2.$namespace1.$table1 SET city='Guangzhou' WHERE id=1")) + }(if (isSparkV35OrGreater) { + s"does not have [select] privilege on [$namespace1/$table1/id]" + } else { + s"does not have [update] privilege on [$namespace1/$table1]" + }) doAs( admin, @@ -135,10 +147,17 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite test("[KYUUBI #3515] DELETE FROM TABLE") { // DeleteFromTable - val e6 = intercept[AccessControlException]( - doAs(someone, sql(s"DELETE FROM $catalogV2.$namespace1.$table1 WHERE id=2"))) - assert(e6.getMessage.contains(s"does not have [update] privilege" + - s" on [$namespace1/$table1]")) + interceptContains[AccessControlException] { + doAs(someone, sql(s"DELETE FROM $catalogV2.$namespace1.$table1 WHERE id=2")) + }(if (isSparkV34OrGreater) { + s"does not have [select] privilege on [$namespace1/$table1/id]" + } else { + s"does not have [update] privilege on [$namespace1/$table1]" + }) + + interceptContains[AccessControlException] { + doAs(bob, sql(s"DELETE FROM $catalogV2.$bobNamespace.$bobSelectTable WHERE id=2")) + }(s"does not have [update] privilege on [$bobNamespace/$bobSelectTable]") doAs(admin, sql(s"DELETE FROM $catalogV2.$namespace1.$table1 WHERE id=2")) } diff --git a/pom.xml b/pom.xml index 5773eda53bf..65498ba4954 100644 --- a/pom.xml +++ b/pom.xml @@ -2247,7 +2247,7 @@ 2.4.0 3.4.1 3.4 - org.scalatest.tags.Slow,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest + org.scalatest.tags.Slow @@ -2262,7 +2262,7 @@ 3.4 3.5.0 3.5 - org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PySparkTest + org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.PySparkTest From 47555eb90098d63f5b0be28f33fc23911edfa0c8 Mon Sep 17 00:00:00 2001 From: yikaifei Date: Tue, 17 Oct 2023 13:09:18 +0800 Subject: [PATCH 17/33] [KYUUBI #5414][KSHC] Reader should not pollut the global hiveConf instance ### _Why are the changes needed?_ This pr aims to fix https://github.com/apache/kyuubi/issues/5414. `HiveReader` initialization incorrectly uses the global hadoopConf as hiveconf, which causes reader to pollut the global hadoopConf and cause job read failure. ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5424 from Yikf/orc-read. Closes #5414 d6bdf7be4 [yikaifei] [KYUUBI #5414] Reader should not polluted the global hiveconf instance Authored-by: yikaifei Signed-off-by: Cheng Pan --- .../spark/connector/hive/read/HiveScan.scala | 2 +- .../spark/connector/hive/HiveQuerySuite.scala | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala index 0b79d730751..ecdfc76c5f9 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala +++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala @@ -64,7 +64,7 @@ case class HiveScan( } override def createReaderFactory(): PartitionReaderFactory = { - val hiveConf = fileIndex.hiveCatalog.hadoopConfiguration() + val hiveConf = new Configuration(fileIndex.hiveCatalog.hadoopConfiguration()) addCatalogTableConfToConf(hiveConf, catalogTable) val table = HiveClientImpl.toHiveTable(catalogTable) diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala index 1d3d5ae10aa..0dd1efdec97 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala +++ b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala @@ -175,6 +175,23 @@ class HiveQuerySuite extends KyuubiHiveTest { } } + test("[KYUUBI #5414] Reader should not polluted the global hiveconf instance") { + withSparkSession() { spark => + val table = "hive.default.hiveconf_test" + withTempPartitionedTable(spark, table, "ORC", hiveTable = true) { + spark.sql( + s""" + | INSERT OVERWRITE + | $table PARTITION(year = '2022') + | VALUES("yi", "08") + |""".stripMargin).collect() + + checkQueryResult(s"select * from $table", spark, Array(Row.apply("yi", "2022", "08"))) + checkQueryResult(s"select count(*) as c from $table", spark, Array(Row.apply(1))) + } + } + } + test("Partitioned table insert and static partition value is empty string") { withSparkSession() { spark => val table = "hive.default.employee" From fdd8f1729a8164e7a4dc49f5acaf0a96061e4561 Mon Sep 17 00:00:00 2001 From: lawulu Date: Tue, 17 Oct 2023 14:41:28 +0800 Subject: [PATCH 18/33] [KYUUBI #5441] Make the configuration kyuubi.zookeeper.embedded.data.log.dir effective ### _Why are the changes needed?_ The configuration `kyuubi.zookeeper.embedded.data.log.dir` exists, but it is not used by the `EmbeddedZookeeper`, it is ineffective ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5440 from biangjuang/zk-log-dir. Closes #5441 784619cda [lawulu] Make the configuration kyuubi.zookeeper.embedded.data.log.dir effective Authored-by: lawulu Signed-off-by: Cheng Pan --- .../org/apache/kyuubi/zookeeper/EmbeddedZookeeper.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeper.scala b/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeper.scala index 4ea63d78259..17caffedff6 100644 --- a/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeper.scala +++ b/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeper.scala @@ -31,12 +31,14 @@ class EmbeddedZookeeper extends AbstractService("EmbeddedZookeeper") { private var zks: ZooKeeperServer = _ private var serverFactory: NIOServerCnxnFactory = _ private var dataDirectory: File = _ + private var dataLogDirectory: File = _ // TODO: Is it right in prod? private val deleteDataDirectoryOnClose = true private var host: String = _ override def initialize(conf: KyuubiConf): Unit = synchronized { dataDirectory = new File(conf.get(ZK_DATA_DIR)) + dataLogDirectory = new File(conf.get(ZK_DATA_LOG_DIR)) val clientPort = conf.get(ZK_CLIENT_PORT) val tickTime = conf.get(ZK_TICK_TIME) val maxClientCnxns = conf.get(ZK_MAX_CLIENT_CONNECTIONS) @@ -51,7 +53,7 @@ class EmbeddedZookeeper extends AbstractService("EmbeddedZookeeper") { } try { - zks = new ZooKeeperServer(dataDirectory, dataDirectory, tickTime) + zks = new ZooKeeperServer(dataDirectory, dataLogDirectory, tickTime) zks.setMinSessionTimeout(minSessionTimeout) zks.setMaxSessionTimeout(maxSessionTimeout) @@ -79,7 +81,10 @@ class EmbeddedZookeeper extends AbstractService("EmbeddedZookeeper") { if (getServiceState == ServiceState.STARTED) { if (null != serverFactory) serverFactory.shutdown() if (null != zks) zks.shutdown() - if (deleteDataDirectoryOnClose) deleteDirectoryRecursively(dataDirectory) + if (deleteDataDirectoryOnClose) { + deleteDirectoryRecursively(dataDirectory) + deleteDirectoryRecursively(dataLogDirectory) + } } super.stop() } From f7143273a52b1b0735108ef1733781820098aa99 Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Tue, 17 Oct 2023 16:24:14 +0800 Subject: [PATCH 19/33] [KYUUBI #5443] Add kyuubi-kubernetes-it module to reformat script ### _Why are the changes needed?_ - With this PR, the code of module `kyuubi-kubernetes-it` will be auto-reformated by running `dev/reformat` script. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No. Closes #5443 from bowenliang123/reformat-k8sit. Closes #5443 39a323efc [Bowen Liang] reformat k8s it module Authored-by: Bowen Liang Signed-off-by: Bowen Liang --- dev/reformat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/reformat b/dev/reformat index 31e8f49ad21..7ad26ae2e17 100755 --- a/dev/reformat +++ b/dev/reformat @@ -20,7 +20,7 @@ set -x KYUUBI_HOME="$(cd "`dirname "$0"`/.."; pwd)" -PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-block-cleaner,spark-3.5,spark-3.4,spark-3.3,spark-3.2,spark-3.1,tpcds" +PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-block-cleaner,spark-3.5,spark-3.4,spark-3.3,spark-3.2,spark-3.1,tpcds,kubernetes-it" # python style checks rely on `black` in path if ! command -v black &> /dev/null From f6ccc4d80fc925d51540595c3b18556c220dd70e Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 17 Oct 2023 17:33:07 +0800 Subject: [PATCH 20/33] [KYUUBI #5359] [AUTHZ] Support Create Table Commands for Hudi ### _Why are the changes needed?_ To close #5359. Kyuubi authz support hudi create table commands - [CreateHoodieTableCommand](https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala): https://hudi.apache.org/docs/sql_ddl#create-table - [CreateHoodieTableAsSelectCommand](https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala): https://hudi.apache.org/docs/sql_ddl#create-table-as-select-ctas - [CreateHoodieTableLikeCommand](https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableLikeCommand.scala): https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableLikeCommand.scala ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5439 from AngersZhuuuu/KYUUBI-5359. Closes #5359 d07010913 [Angerszhuuuu] Update HudiCatalogRangerSparkExtensionSuite.scala f0bc79a77 [Angerszhuuuu] [KYUUBI #5284] Support Hudi Creeate Table Command in Authz Authored-by: Angerszhuuuu Signed-off-by: Cheng Pan --- .../main/resources/table_command_spec.json | 54 +++++++++++++ .../plugin/spark/authz/gen/HudiCommands.scala | 33 +++++++- ...HudiCatalogRangerSparkExtensionSuite.scala | 77 +++++++++++++++++++ 3 files changed, 163 insertions(+), 1 deletion(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json index 513259e1375..913fef016ac 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json @@ -1513,6 +1513,60 @@ } ], "opType" : "ALTERTABLE_PROPERTIES", "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.CreateHoodieTableAsSelectCommand", + "tableDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "CREATETABLE_AS_SELECT", + "queryDescs" : [ { + "fieldName" : "query", + "fieldExtractor" : "LogicalPlanQueryExtractor" + } ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.CreateHoodieTableCommand", + "tableDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "CREATETABLE", + "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.CreateHoodieTableLikeCommand", + "tableDescs" : [ { + "fieldName" : "targetTable", + "fieldExtractor" : "TableIdentifierTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : true + }, { + "fieldName" : "sourceTable", + "fieldExtractor" : "TableIdentifierTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : true, + "setCurrentDatabaseIfMissing" : true + } ], + "opType" : "CREATETABLE", + "queryDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.Spark31AlterTableCommand", "tableDescs" : [ { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala index 6e3237d2a72..c4488edbf66 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala @@ -72,11 +72,42 @@ object HudiCommands { TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_PROPERTIES) } + val CreateHoodieTableCommand = { + val cmd = "org.apache.spark.sql.hudi.command.CreateHoodieTableCommand" + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor]) + TableCommandSpec(cmd, Seq(tableDesc), CREATETABLE) + } + + val CreateHoodieTableAsSelectCommand = { + val cmd = "org.apache.spark.sql.hudi.command.CreateHoodieTableAsSelectCommand" + CreateHoodieTableCommand.copy( + classname = cmd, + opType = CREATETABLE_AS_SELECT, + queryDescs = Seq(QueryDesc("query"))) + } + + val CreateHoodieTableLikeCommand = { + val cmd = "org.apache.spark.sql.hudi.command.CreateHoodieTableLikeCommand" + val tableDesc1 = TableDesc( + "targetTable", + classOf[TableIdentifierTableExtractor], + setCurrentDatabaseIfMissing = true) + val tableDesc2 = TableDesc( + "sourceTable", + classOf[TableIdentifierTableExtractor], + isInput = true, + setCurrentDatabaseIfMissing = true) + TableCommandSpec(cmd, Seq(tableDesc1, tableDesc2), CREATETABLE) + } + val data: Array[TableCommandSpec] = Array( AlterHoodieTableAddColumnsCommand, AlterHoodieTableChangeColumnCommand, AlterHoodieTableDropPartitionCommand, AlterHoodieTableRenameCommand, AlterTableCommand, - Spark31AlterTableCommand) + Spark31AlterTableCommand, + CreateHoodieTableCommand, + CreateHoodieTableAsSelectCommand, + CreateHoodieTableLikeCommand) } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala index 8fcae6cf941..ac1e357a22f 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala @@ -129,4 +129,81 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { s"does not have [alter] privilege on [$namespace1/$table1]") } } + + test("CreateHoodieTableCommand") { + withCleanTmpResources(Seq((namespace1, "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + interceptContains[AccessControlException]( + doAs( + someone, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table1(id int, name string, city string) + |USING HUDI + |OPTIONS ( + | type = 'cow', + | primaryKey = 'id', + | 'hoodie.datasource.hive_sync.enable' = 'false' + |) + |PARTITIONED BY(city) + |""".stripMargin)))(s"does not have [create] privilege on [$namespace1/$table1]") + } + } + + test("CreateHoodieTableAsSelectCommand") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (namespace1, "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs( + admin, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table1(id int, name string, city string) + |USING HUDI + |OPTIONS ( + | type = 'cow', + | primaryKey = 'id', + | 'hoodie.datasource.hive_sync.enable' = 'false' + |) + |PARTITIONED BY(city) + |""".stripMargin)) + interceptContains[AccessControlException]( + doAs( + someone, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table2 + |USING HUDI + |AS + |SELECT id FROM $namespace1.$table1 + |""".stripMargin)))(s"does not have [select] privilege on [$namespace1/$table1/id]") + } + } + + test("CreateHoodieTableLikeCommand") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (namespace1, "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs( + admin, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table1(id int, name string, city string) + |USING HUDI + |OPTIONS ( + | type = 'cow', + | primaryKey = 'id', + | 'hoodie.datasource.hive_sync.enable' = 'false' + |) + |PARTITIONED BY(city) + |""".stripMargin)) + interceptContains[AccessControlException]( + doAs( + someone, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table2 + |LIKE $namespace1.$table1 + |USING HUDI + |""".stripMargin)))(s"does not have [select] privilege on [$namespace1/$table1]") + } + } } From 8f6b15c91774264fcafe71b38d29fb81807e4df7 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 17 Oct 2023 22:30:58 +0800 Subject: [PATCH 21/33] [KYUUBI #5417] should not check in-subquery in permanent view ### _Why are the changes needed?_ Fix #5417 If there is is a view with in-subquery, authz will still request this in-subquery's interval privilege, it's not we want. For view ``` CREATE VIEW db.view1 AS WITH temp AS ( SELECT max(scope) max_scope FROM db1.table1) SELECT id as new_id FROM db1.table2 WHERE scope in (SELECT max_scope FROM temp) ``` When we query the view ``` SEELCT * FROM db.view1 ``` Before this pr, since spark will first execute subquery, it will first request `[default/table1/scope]` then request `[default/view1/new_id]` after this pr, it only request `[default/view1/new_id]` ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ no Closes #5442 from AngersZhuuuu/KYUUBI-5417-FOLLOWUP. Closes #5417 6919903cb [Angerszhuuuu] Update RuleApplyPermanentViewMarker.scala 5097d8059 [Angerszhuuuu] [KYUUBI #5417] should not check in-subquery in permanent view Authored-by: Angerszhuuuu Signed-off-by: Kent Yao --- .../ranger/RuleApplyPermanentViewMarker.scala | 6 +-- .../ranger/RangerSparkExtensionSuite.scala | 46 ++++++++++++++++++- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala index 91741080759..679b5d65dfe 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala @@ -17,7 +17,7 @@ package org.apache.kyuubi.plugin.spark.authz.ranger -import org.apache.spark.sql.catalyst.expressions.ScalarSubquery +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, View} import org.apache.spark.sql.catalyst.rules.Rule @@ -38,11 +38,11 @@ class RuleApplyPermanentViewMarker extends Rule[LogicalPlan] { case p: PermanentViewMarker => p case permanentView: View if hasResolvedPermanentView(permanentView) => val resolvedSubquery = permanentView.transformAllExpressions { - case scalarSubquery: ScalarSubquery => + case subquery: SubqueryExpression => // TODO: Currently, we do not do an auth check in the subquery // as the main query part also secures it. But for performance consideration, // we also pre-check it in subqueries and fail fast with negative privileges. - scalarSubquery.copy(plan = PermanentViewMarker(scalarSubquery.plan, null)) + subquery.withNewPlan(plan = PermanentViewMarker(subquery.plan, null)) } PermanentViewMarker(resolvedSubquery, resolvedSubquery.desc) case other => apply(other) diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala index d109a7f2b07..8e1fe058739 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala @@ -748,7 +748,7 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { } } - test("[KYUUBI #5417] should not check dependent subquery plan privilege") { + test("[KYUUBI #5417] should not check scalar-subquery in permanent view") { val db1 = defaultDb val table1 = "table1" val table2 = "table2" @@ -791,4 +791,48 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { assert(e2.getMessage.contains(s"does not have [select] privilege on [$db1/$view1/new_id]")) } } + + test("[KYUUBI #5417] should not check in-subquery in permanent view") { + val db1 = defaultDb + val table1 = "table1" + val table2 = "table2" + val view1 = "view1" + withCleanTmpResources( + Seq((s"$db1.$table1", "table"), (s"$db1.$table2", "table"), (s"$db1.$view1", "view"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table2 (id int, scope int)")) + + val e1 = intercept[AccessControlException] { + doAs( + someone, + sql( + s""" + |WITH temp AS ( + | SELECT max(scope) max_scope + | FROM $db1.$table1) + |SELECT id as new_id FROM $db1.$table2 + |WHERE scope in (SELECT max_scope FROM temp) + |""".stripMargin).show()) + } + // Will first check subquery privilege. + assert(e1.getMessage.contains(s"does not have [select] privilege on [$db1/$table1/scope]")) + + doAs( + admin, + sql( + s""" + |CREATE VIEW $db1.$view1 + |AS + |WITH temp AS ( + | SELECT max(scope) max_scope + | FROM $db1.$table1) + |SELECT id as new_id FROM $db1.$table2 + |WHERE scope in (SELECT max_scope FROM temp) + |""".stripMargin)) + // Will just check permanent view privilege. + val e2 = intercept[AccessControlException]( + doAs(someone, sql(s"SELECT * FROM $db1.$view1".stripMargin).show())) + assert(e2.getMessage.contains(s"does not have [select] privilege on [$db1/$view1/new_id]")) + } + } } From 16752164a1e4f433241d7ee4735cf414c27f2a6d Mon Sep 17 00:00:00 2001 From: senmiaoliu Date: Wed, 18 Oct 2023 12:55:00 +0800 Subject: [PATCH 22/33] [KYUUBI #5408] MetadataManager tries MySQL 8 driver class first ### _Why are the changes needed?_ close #5408 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ NO Closes #5433 from lsm1/branch-kyuubi-5408. Closes #5408 8c6cc223d [senmiaoliu] fix style 20415dc44 [senmiaoliu] use com.mysql.cj.jdbc.Driver first Authored-by: senmiaoliu Signed-off-by: Shaoyun Chen --- docs/configuration/settings.md | 36 +++++++++---------- .../metadata/jdbc/JDBCMetadataStore.scala | 9 ++++- .../metadata/jdbc/JDBCMetadataStoreConf.scala | 3 +- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index d9d8d95efb1..711954ead07 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -332,24 +332,24 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Metadata -| Key | Default | Meaning | Type | Since | -|-------------------------------------------------|----------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.metadata.cleaner.enabled | true | Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in the terminate state with max age limitation. | boolean | 1.6.0 | -| kyuubi.metadata.cleaner.interval | PT30M | The interval to check and clean expired metadata. | duration | 1.6.0 | -| kyuubi.metadata.max.age | PT72H | The maximum age of metadata, the metadata exceeding the age will be cleaned. | duration | 1.6.0 | -| kyuubi.metadata.recovery.threads | 10 | The number of threads for recovery from the metadata store when the Kyuubi server restarts. | int | 1.6.0 | -| kyuubi.metadata.request.async.retry.enabled | true | Whether to retry in async when metadata request failed. When true, return success response immediately even the metadata request failed, and schedule it in background until success, to tolerate long-time metadata store outages w/o blocking the submission request. | boolean | 1.7.0 | -| kyuubi.metadata.request.async.retry.queue.size | 65536 | The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | -| kyuubi.metadata.request.async.retry.threads | 10 | Number of threads in the metadata request async retry manager thread pool. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | -| kyuubi.metadata.request.retry.interval | PT5S | The interval to check and trigger the metadata request retry tasks. | duration | 1.6.0 | -| kyuubi.metadata.store.class | org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore | Fully qualified class name for server metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.database.schema.init | true | Whether to init the JDBC metadata store database schema. | boolean | 1.6.0 | -| kyuubi.metadata.store.jdbc.database.type | SQLITE | The database type for server jdbc metadata store.
    • (Deprecated) DERBY: Apache Derby, JDBC driver `org.apache.derby.jdbc.AutoloadedDriver`.
    • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
    • MYSQL: MySQL, JDBC driver `com.mysql.jdbc.Driver`.
    • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
    • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | -| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | +| Key | Default | Meaning | Type | Since | +|-------------------------------------------------|----------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.metadata.cleaner.enabled | true | Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in the terminate state with max age limitation. | boolean | 1.6.0 | +| kyuubi.metadata.cleaner.interval | PT30M | The interval to check and clean expired metadata. | duration | 1.6.0 | +| kyuubi.metadata.max.age | PT72H | The maximum age of metadata, the metadata exceeding the age will be cleaned. | duration | 1.6.0 | +| kyuubi.metadata.recovery.threads | 10 | The number of threads for recovery from the metadata store when the Kyuubi server restarts. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.enabled | true | Whether to retry in async when metadata request failed. When true, return success response immediately even the metadata request failed, and schedule it in background until success, to tolerate long-time metadata store outages w/o blocking the submission request. | boolean | 1.7.0 | +| kyuubi.metadata.request.async.retry.queue.size | 65536 | The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.threads | 10 | Number of threads in the metadata request async retry manager thread pool. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.retry.interval | PT5S | The interval to check and trigger the metadata request retry tasks. | duration | 1.6.0 | +| kyuubi.metadata.store.class | org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore | Fully qualified class name for server metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.schema.init | true | Whether to init the JDBC metadata store database schema. | boolean | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.type | SQLITE | The database type for server jdbc metadata store.
      • (Deprecated) DERBY: Apache Derby, JDBC driver `org.apache.derby.jdbc.AutoloadedDriver`.
      • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
      • MYSQL: MySQL, JDBC driver `com.mysql.cj.jdbc.Driver` (fallback `com.mysql.jdbc.Driver`).
      • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
      • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | +| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | ### Metrics diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala index 419fa844750..9b1c89d779b 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala @@ -40,16 +40,23 @@ import org.apache.kyuubi.server.metadata.jdbc.DatabaseType._ import org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStoreConf._ import org.apache.kyuubi.session.SessionType import org.apache.kyuubi.util.JdbcUtils +import org.apache.kyuubi.util.reflect.ReflectUtils class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { import JDBCMetadataStore._ private val dbType = DatabaseType.withName(conf.get(METADATA_STORE_JDBC_DATABASE_TYPE)) private val driverClassOpt = conf.get(METADATA_STORE_JDBC_DRIVER) + private lazy val mysqlDriverClass = + if (ReflectUtils.isClassLoadable("com.mysql.cj.jdbc.Driver")) { + "com.mysql.cj.jdbc.Driver" + } else { + "com.mysql.jdbc.Driver" + } private val driverClass = dbType match { case SQLITE => driverClassOpt.getOrElse("org.sqlite.JDBC") case DERBY => driverClassOpt.getOrElse("org.apache.derby.jdbc.AutoloadedDriver") - case MYSQL => driverClassOpt.getOrElse("com.mysql.jdbc.Driver") + case MYSQL => driverClassOpt.getOrElse(mysqlDriverClass) case CUSTOM => driverClassOpt.getOrElse( throw new IllegalArgumentException("No jdbc driver defined")) } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala index 292cf417483..96a5539fb27 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala @@ -40,7 +40,8 @@ object JDBCMetadataStoreConf { "
      • (Deprecated) DERBY: Apache Derby, JDBC driver " + "`org.apache.derby.jdbc.AutoloadedDriver`.
      • " + "
      • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
      • " + - "
      • MYSQL: MySQL, JDBC driver `com.mysql.jdbc.Driver`.
      • " + + "
      • MYSQL: MySQL, JDBC driver `com.mysql.cj.jdbc.Driver` " + + "(fallback `com.mysql.jdbc.Driver`).
      • " + "
      • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
      • " + " Note that: The JDBC datasource is powered by HiKariCP, for datasource properties," + " please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource." + From c5854f74b2a69cd5f75b13a55a55e41ecf486ebb Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 18 Oct 2023 13:16:24 +0800 Subject: [PATCH 23/33] [KYUUBI #5361] [AUTHZ] Support Drop/Truncate Table Commands for Hudi ### _Why are the changes needed?_ To close #5361. Kyuubi authz support hudi drop/repair/truncate table commands - DropHoodieTableCommand: https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/DropHoodieTableCommand.scala - TruncateHoodieTableCommand: https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/TruncateHoodieTableCommand.scala ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5445 from AngersZhuuuu/KYUUBI-5361. Closes #5361 ed9d43acd [Angerszhuuuu] update a08dcaafc [Angerszhuuuu] Update HudiCatalogRangerSparkExtensionSuite.scala 372d1fbed [Angerszhuuuu] Update HudiCatalogRangerSparkExtensionSuite.scala e0aa8a783 [Bowen Liang] add positive cases 9daf0b4d0 [Bowen Liang] compact code style 7eb0828d7 [Bowen Liang] Update extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala 81024ac10 [Angerszhuuuu] update 31d440617 [Angerszhuuuu] Update HudiCatalogRangerSparkExtensionSuite.scala fae5a64bc [Angerszhuuuu] Merge branch 'master' into KYUUBI-5361 a70617b18 [Angerszhuuuu] [KYUUBI #5361] [AUTHZ] Support Drop/Repair/Truncate Table Commands for Hudi Lead-authored-by: Angerszhuuuu Co-authored-by: Bowen Liang Co-authored-by: Bowen Liang Signed-off-by: Cheng Pan --- .../main/resources/table_command_spec.json | 35 +++++++++ .../plugin/spark/authz/gen/HudiCommands.scala | 31 +++++++- ...HudiCatalogRangerSparkExtensionSuite.scala | 72 +++++++++++++++++-- 3 files changed, 130 insertions(+), 8 deletions(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json index 913fef016ac..abf4c314c0e 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json @@ -1567,6 +1567,24 @@ } ], "opType" : "CREATETABLE", "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.DropHoodieTableCommand", + "tableDescs" : [ { + "fieldName" : "tableIdentifier", + "fieldExtractor" : "TableIdentifierTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : { + "fieldName" : "tableIdentifier", + "fieldExtractor" : "TableIdentifierTableTypeExtractor", + "skipTypes" : [ "TEMP_VIEW" ] + }, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "DROPTABLE", + "queryDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.Spark31AlterTableCommand", "tableDescs" : [ { @@ -1581,4 +1599,21 @@ } ], "opType" : "ALTERTABLE_PROPERTIES", "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.TruncateHoodieTableCommand", + "tableDescs" : [ { + "fieldName" : "tableIdentifier", + "fieldExtractor" : "TableIdentifierTableExtractor", + "columnDesc" : { + "fieldName" : "partitionSpec", + "fieldExtractor" : "PartitionOptionColumnExtractor" + }, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "TRUNCATETABLE", + "queryDescs" : [ ] } ] \ No newline at end of file diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala index c4488edbf66..a5f65c3d014 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala @@ -100,6 +100,33 @@ object HudiCommands { TableCommandSpec(cmd, Seq(tableDesc1, tableDesc2), CREATETABLE) } + val DropHoodieTableCommand = { + val cmd = "org.apache.spark.sql.hudi.command.DropHoodieTableCommand" + val tableTypeDesc = + TableTypeDesc( + "tableIdentifier", + classOf[TableIdentifierTableTypeExtractor], + Seq(TEMP_VIEW)) + TableCommandSpec( + cmd, + Seq(TableDesc( + "tableIdentifier", + classOf[TableIdentifierTableExtractor], + tableTypeDesc = Some(tableTypeDesc))), + DROPTABLE) + } + + val TruncateHoodieTableCommand = { + val cmd = "org.apache.spark.sql.hudi.command.TruncateHoodieTableCommand" + val columnDesc = ColumnDesc("partitionSpec", classOf[PartitionOptionColumnExtractor]) + val tableDesc = + TableDesc( + "tableIdentifier", + classOf[TableIdentifierTableExtractor], + columnDesc = Some(columnDesc)) + TableCommandSpec(cmd, Seq(tableDesc), TRUNCATETABLE) + } + val data: Array[TableCommandSpec] = Array( AlterHoodieTableAddColumnsCommand, AlterHoodieTableChangeColumnCommand, @@ -109,5 +136,7 @@ object HudiCommands { Spark31AlterTableCommand, CreateHoodieTableCommand, CreateHoodieTableAsSelectCommand, - CreateHoodieTableLikeCommand) + CreateHoodieTableLikeCommand, + DropHoodieTableCommand, + TruncateHoodieTableCommand) } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala index ac1e357a22f..fc3ebf4fe74 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala @@ -180,7 +180,10 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { } test("CreateHoodieTableLikeCommand") { - withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (namespace1, "database"))) { + withCleanTmpResources(Seq( + (s"$namespace1.$table1", "table"), + (s"$namespace1.$table2", "table"), + (namespace1, "database"))) { doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) doAs( admin, @@ -195,15 +198,70 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |) |PARTITIONED BY(city) |""".stripMargin)) - interceptContains[AccessControlException]( + + val createTableSql = + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table2 + |LIKE $namespace1.$table1 + |USING HUDI + |""".stripMargin + interceptContains[AccessControlException] { doAs( someone, sql( - s""" - |CREATE TABLE IF NOT EXISTS $namespace1.$table2 - |LIKE $namespace1.$table1 - |USING HUDI - |""".stripMargin)))(s"does not have [select] privilege on [$namespace1/$table1]") + createTableSql)) + }(s"does not have [select] privilege on [$namespace1/$table1]") + doAs(admin, sql(createTableSql)) + } + } + + test("DropHoodieTableCommand") { + withCleanTmpResources(Seq((namespace1, "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs( + admin, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table1(id int, name string, city string) + |USING HUDI + |OPTIONS ( + | type = 'cow', + | primaryKey = 'id', + | 'hoodie.datasource.hive_sync.enable' = 'false' + |) + |PARTITIONED BY(city) + |""".stripMargin)) + + val dropTableSql = s"DROP TABLE IF EXISTS $namespace1.$table1" + interceptContains[AccessControlException] { + doAs(someone, sql(dropTableSql)) + }(s"does not have [drop] privilege on [$namespace1/$table1]") + doAs(admin, sql(dropTableSql)) + } + } + + test("TruncateHoodieTableCommand") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (namespace1, "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs( + admin, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table1(id int, name string, city string) + |USING HUDI + |OPTIONS ( + | type = 'cow', + | primaryKey = 'id', + | 'hoodie.datasource.hive_sync.enable' = 'false' + |) + |PARTITIONED BY(city) + |""".stripMargin)) + + val truncateTableSql = s"TRUNCATE TABLE $namespace1.$table1" + interceptContains[AccessControlException] { + doAs(someone, sql(truncateTableSql)) + }(s"does not have [update] privilege on [$namespace1/$table1]") + doAs(admin, sql(truncateTableSql)) } } } From 8d2c8d1009791e4d39f858450697a797a6bfbfa7 Mon Sep 17 00:00:00 2001 From: zml1206 Date: Wed, 18 Oct 2023 16:17:59 +0800 Subject: [PATCH 24/33] [KYUUBI #5449] Bump Delta Lake 3.0.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### _Why are the changes needed?_ close [#5449](https://github.com/apache/kyuubi/issues/5449). Unlike the initial preview release, Delta Spark 3.0.0 is now built on top of Apache Spark™ 3.5. Delta Spark maven artifact has been renamed from delta-core to delta-spark. https://github.com/delta-io/delta/releases/tag/v3.0.0 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5450 from zml1206/5449. Closes #5449 a7969ed6a [zml1206] bump Delta Lake 3.0.0 Authored-by: zml1206 Signed-off-by: Cheng Pan --- externals/kyuubi-spark-sql-engine/pom.xml | 2 +- kyuubi-server/pom.xml | 2 +- pom.xml | 10 ++++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/externals/kyuubi-spark-sql-engine/pom.xml b/externals/kyuubi-spark-sql-engine/pom.xml index c453bd28382..555e41a44b6 100644 --- a/externals/kyuubi-spark-sql-engine/pom.xml +++ b/externals/kyuubi-spark-sql-engine/pom.xml @@ -148,7 +148,7 @@ io.delta - delta-core_${scala.binary.version} + ${delta.artifact}_${scala.binary.version} test diff --git a/kyuubi-server/pom.xml b/kyuubi-server/pom.xml index a8b133d2792..886303a1153 100644 --- a/kyuubi-server/pom.xml +++ b/kyuubi-server/pom.xml @@ -456,7 +456,7 @@ io.delta - delta-core_${scala.binary.version} + ${delta.artifact}_${scala.binary.version} test diff --git a/pom.xml b/pom.xml index 65498ba4954..00ee354a19e 100644 --- a/pom.xml +++ b/pom.xml @@ -133,6 +133,7 @@ 2.6 3.13.0 0.7.3 + delta-core 2.4.0 2.4.4 0.9.3 @@ -1192,7 +1193,7 @@ io.delta - delta-core_${scala.binary.version} + ${delta.artifact}_${scala.binary.version} ${delta.version} 1.3.1 @@ -2216,6 +2218,7 @@ 3.2.4 3.2 + delta-core 2.0.2 spark-${spark.version}-bin-hadoop3.2${spark.archive.scala.suffix}.tgz org.scalatest.tags.Slow @@ -2231,6 +2234,7 @@ 2.3.0 + delta-core 3.3.3 3.3 org.scalatest.tags.Slow @@ -2244,6 +2248,7 @@ extensions/spark/kyuubi-spark-connector-hive + delta-core 2.4.0 3.4.1 3.4 @@ -2257,7 +2262,8 @@ extensions/spark/kyuubi-extension-spark-3-5 - 2.4.0 + delta-spark + 3.0.0 3.4 3.5.0 From facfe57a4957b6e89bd7d2004924d7f9ff147b08 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 18 Oct 2023 18:02:29 +0800 Subject: [PATCH 25/33] [KYUUBI #5452][AUTHZ] Support Compaction table commands for Hudi ### _Why are the changes needed?_ To close #5452 Support Compaction table/path related command. The SQL grammar is https://github.com/apache/hudi/blob/release-0.14.0/hudi-spark-datasource/hudi-spark/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlCommon.g4 - CompactionHoodieTableCommand :https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodieTableCommand.scala - CompactionShowHoodiePathCommand: https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodiePathCommand.scala - CompactionShowHoodieTableCommand: https://github.com/apache/hudi/blob/master/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionShowHoodieTableCommand.scala ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5454 from AngersZhuuuu/KYUUBI-5452. Closes #5452 e42200f7e [Angerszhuuuu] follow comment 4d5139e9a [Angerszhuuuu] Update HudiCatalogRangerSparkExtensionSuite.scala 0e7cb924b [Angerszhuuuu] follow comment e14dc4129 [Angerszhuuuu] [KYUUBI #5452][AUTHZ] Support Compaction table/path related command Authored-by: Angerszhuuuu Signed-off-by: Cheng Pan --- .../main/resources/table_command_spec.json | 37 +++++++++++++++++++ .../plugin/spark/authz/gen/HudiCommands.scala | 20 ++++++++-- ...HudiCatalogRangerSparkExtensionSuite.scala | 31 ++++++++++++++++ 3 files changed, 85 insertions(+), 3 deletions(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json index abf4c314c0e..21d647f5d95 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json @@ -1513,6 +1513,43 @@ } ], "opType" : "ALTERTABLE_PROPERTIES", "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.CompactionHoodieTableCommand", + "tableDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false + }, { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : true, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "CREATETABLE", + "queryDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.CompactionShowHoodieTableCommand", + "tableDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : true, + "setCurrentDatabaseIfMissing" : false + } ], + "opType" : "SHOW_TBLPROPERTIES", + "queryDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.CreateHoodieTableAsSelectCommand", "tableDescs" : [ { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala index a5f65c3d014..72daa89e2a3 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala @@ -127,16 +127,30 @@ object HudiCommands { TableCommandSpec(cmd, Seq(tableDesc), TRUNCATETABLE) } + val CompactionHoodieTableCommand = { + val cmd = "org.apache.spark.sql.hudi.command.CompactionHoodieTableCommand" + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor]) + TableCommandSpec(cmd, Seq(tableDesc, tableDesc.copy(isInput = true)), CREATETABLE) + } + + val CompactionShowHoodieTableCommand = { + val cmd = "org.apache.spark.sql.hudi.command.CompactionShowHoodieTableCommand" + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], isInput = true) + TableCommandSpec(cmd, Seq(tableDesc), SHOW_TBLPROPERTIES) + } + val data: Array[TableCommandSpec] = Array( AlterHoodieTableAddColumnsCommand, AlterHoodieTableChangeColumnCommand, AlterHoodieTableDropPartitionCommand, AlterHoodieTableRenameCommand, AlterTableCommand, - Spark31AlterTableCommand, - CreateHoodieTableCommand, CreateHoodieTableAsSelectCommand, + CreateHoodieTableCommand, CreateHoodieTableLikeCommand, + CompactionHoodieTableCommand, + CompactionShowHoodieTableCommand, DropHoodieTableCommand, - TruncateHoodieTableCommand) + TruncateHoodieTableCommand, + Spark31AlterTableCommand) } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala index fc3ebf4fe74..48af6bf9fff 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala @@ -264,4 +264,35 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { doAs(admin, sql(truncateTableSql)) } } + + test("CompactionHoodieTableCommand / CompactionShowHoodieTableCommand") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (namespace1, "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs( + admin, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table1(id int, name string, city string) + |USING HUDI + |OPTIONS ( + | type = 'mor', + | primaryKey = 'id', + | 'hoodie.datasource.hive_sync.enable' = 'false' + |) + |PARTITIONED BY(city) + |""".stripMargin)) + + val compactionTable = s"RUN COMPACTION ON $namespace1.$table1" + interceptContains[AccessControlException] { + doAs(someone, sql(compactionTable)) + }(s"does not have [select] privilege on [$namespace1/$table1]") + doAs(admin, sql(compactionTable)) + + val showCompactionTable = s"SHOW COMPACTION ON $namespace1.$table1" + interceptContains[AccessControlException] { + doAs(someone, sql(showCompactionTable)) + }(s"does not have [select] privilege on [$namespace1/$table1]") + doAs(admin, sql(showCompactionTable)) + } + } } From aab8144e0b88e9c7ad7a37e44a1b3d426bfe6b2e Mon Sep 17 00:00:00 2001 From: minyk Date: Wed, 18 Oct 2023 18:52:31 +0800 Subject: [PATCH 26/33] [KYUUBI #5451] Ignore NoSuchFileException during OperationLog.close() ### _Why are the changes needed?_ OperationLog.close() try to close BufferedReader, but it says file is missing(NoSuchFileException). This happens a lot after restarting kyuubi-server and PV is not set for `kyuubi.operation.log.dir.root`. Logs are like this: ``` 2023-10-18 04:16:34.296 ERROR KyuubiTBinaryFrontendHandler-Pool: Thread-113 org.apache.kyuubi.operation.LaunchEngine: Failed to remove corresponding log file of operation: /opt/kyuubi/work/server_operation_logs/26434238-5615-456a-aa7b-fad4dde8da27/43a9bfd4-6795-422c-8031-5409d7fe3732 java.io.IOException: Failed to remove corresponding log file of operation: /opt/kyuubi/work/server_operation_logs/26434238-5615-456a-aa7b-fad4dde8da27/43a9bfd4-6795-422c-8031-5409d7fe3732 at org.apache.kyuubi.operation.log.OperationLog.trySafely(OperationLog.scala:230) ~[kyuubi-common_2.12-1.6.1-incubating.jar:1.6.1-incubating] at org.apache.kyuubi.operation.log.OperationLog.close(OperationLog.scala:201) ~[kyuubi-common_2.12-1.6.1-incubating.jar:1.6.1-incubating] at org.apache.kyuubi.operation.KyuubiOperation.$anonfun$close$2(KyuubiOperation.scala:119) ~[kyuubi-server_2.12-1.6.1-incubating.jar:1.6.1-incubating] at org.apache.kyuubi.operation.KyuubiOperation.$anonfun$close$2$adapted(KyuubiOperation.scala:119) ~[kyuubi-server_2.12-1.6.1-incubating.jar:1.6.1-incubating] at scala.Option.foreach(Option.scala:407) ~[scala-library-2.12.15.jar:?] at org.apache.kyuubi.operation.KyuubiOperation.liftedTree2$1(KyuubiOperation.scala:119) ~[kyuubi-server_2.12-1.6.1-incubating.jar:1.6.1-incubating] at org.apache.kyuubi.operation.KyuubiOperation.close(KyuubiOperation.scala:116) ~[kyuubi-server_2.12-1.6.1-incubating.jar:1.6.1-incubating] at org.apache.kyuubi.operation.OperationManager.closeOperation(OperationManager.scala:126) ~[kyuubi-common_2.12-1.6.1-incubating.jar:1.6.1-incubating] at org.apache.kyuubi.session.AbstractSession.$anonfun$close$2(AbstractSession.scala:89) ~[kyuubi-common_2.12-1.6.1-incubating.jar:1.6.1-incubating] at java.lang.Iterable.forEach(Iterable.java:75) ~[?:1.8.0_342] ... Caused by: java.nio.file.NoSuchFileException: /opt/kyuubi/work/server_operation_logs/26434238-5615-456a-aa7b-fad4dde8da27/43a9bfd4-6795-422c-8031-5409d7fe3732 at sun.nio.fs.UnixException.translateToIOException(UnixException.java:86) ~[?:1.8.0_342] at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) ~[?:1.8.0_342] at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) ~[?:1.8.0_342] at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:214) ~[?:1.8.0_342] at java.nio.file.Files.newByteChannel(Files.java:361) ~[?:1.8.0_342] at java.nio.file.Files.newByteChannel(Files.java:407) ~[?:1.8.0_342] at java.nio.file.spi.FileSystemProvider.newInputStream(FileSystemProvider.java:384) ~[?:1.8.0_342] at java.nio.file.Files.newInputStream(Files.java:152) ~[?:1.8.0_342] at java.nio.file.Files.newBufferedReader(Files.java:2784) ~[?:1.8.0_342] at org.apache.kyuubi.operation.log.OperationLog.reader$lzycompute(OperationLog.scala:89) ~[kyuubi-common_2.12-1.6.1-incubating.jar:1.6.1-incubating] at org.apache.kyuubi.operation.log.OperationLog.reader(OperationLog.scala:89) ~[kyuubi-common_2.12-1.6.1-incubating.jar:1.6.1-incubating] at org.apache.kyuubi.operation.log.OperationLog.$anonfun$close$1(OperationLog.scala:201) ~[kyuubi-common_2.12-1.6.1-incubating.jar:1.6.1-incubating] at org.apache.kyuubi.operation.log.OperationLog.trySafely(OperationLog.scala:221) ~[kyuubi-common_2.12-1.6.1-incubating.jar:1.6.1-incubating] ... 34 more ``` `OperationLog.trySafely()`is called by `OperationLog.close()` only, so we can ignore this exception. This closes #5451 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [X] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5453 from minyk/kyuubi-5451. Closes #5451 b95731499 [minyk] ignore NoSuchFileException during OperationLog.close() Authored-by: minyk Signed-off-by: Kent Yao --- .../scala/org/apache/kyuubi/operation/log/OperationLog.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala index 7ee803cb39e..2e133df28b8 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala @@ -20,7 +20,7 @@ package org.apache.kyuubi.operation.log import java.io.{BufferedReader, IOException} import java.nio.ByteBuffer import java.nio.charset.StandardCharsets -import java.nio.file.{Files, Path, Paths} +import java.nio.file.{Files, NoSuchFileException, Path, Paths} import java.util.{ArrayList => JArrayList, List => JList} import scala.collection.JavaConverters._ @@ -262,6 +262,7 @@ class OperationLog(path: Path) { try { f } catch { + case _: NoSuchFileException => case e: IOException => // Printing log here may cause a deadlock. The lock order of OperationLog.write // is RootLogger -> LogDivertAppender -> OperationLog. If printing log here, the From c5fed9f2e1368e7f4046c4f1f71a9e19ea8611c0 Mon Sep 17 00:00:00 2001 From: labbomb <739955946@qq.com> Date: Wed, 18 Oct 2023 20:26:45 +0800 Subject: [PATCH 27/33] [KYUUBI #5463] [UI] Adjust the file directory structure ### _Why are the changes needed?_ 1. Some of the code was duplicated, so it was removed. 2. The current front-end directory is not very reasonable, and some common methods and layout structures are placed under views, which can look strange, so I adjusted the directory structure to make it reasonable ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [x] Add screenshots for manual tests if appropriate image - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5463 from labbomb/dev. Closes #5463 a1ca90a8b [labbomb] fix: fix test error b1a73c090 [labbomb] Merge branch 'dev' of github.com:labbomb/kyuubi into dev f91b7d08c [labbomb] feat: adjust the file directory structure Authored-by: labbomb <739955946@qq.com> Signed-off-by: Cheng Pan --- .../{ => assets}/styles/element/index.scss | 0 .../web-ui/src/{ => assets}/styles/index.scss | 0 .../web-ui/src/components/menu/index.vue | 2 +- .../layout/components/aside/index.vue | 0 .../layout/components/aside/types.ts | 0 .../layout/components/header/index.vue | 0 .../layout/components/header/types.ts | 0 .../layout/components/header/use-locales.ts | 0 .../web-ui/src/{views => }/layout/index.vue | 0 kyuubi-server/web-ui/src/main.ts | 4 +-- .../web-ui/src/pinia/modules/layout.ts | 32 ------------------- kyuubi-server/web-ui/src/router/index.ts | 2 +- .../src/test/unit/views/layout/aside.spec.ts | 2 +- .../src/test/unit/views/layout/header.spec.ts | 2 +- .../src/test/unit/views/layout/layout.spec.ts | 2 +- .../src/{views/common => utils}/use-table.ts | 0 .../web-ui/src/views/detail/session/index.vue | 2 +- .../src/views/management/engine/index.vue | 2 +- .../src/views/management/operation/index.vue | 2 +- .../src/views/management/server/index.vue | 2 +- .../src/views/management/session/index.vue | 2 +- 21 files changed, 12 insertions(+), 44 deletions(-) rename kyuubi-server/web-ui/src/{ => assets}/styles/element/index.scss (100%) rename kyuubi-server/web-ui/src/{ => assets}/styles/index.scss (100%) rename kyuubi-server/web-ui/src/{views => }/layout/components/aside/index.vue (100%) rename kyuubi-server/web-ui/src/{views => }/layout/components/aside/types.ts (100%) rename kyuubi-server/web-ui/src/{views => }/layout/components/header/index.vue (100%) rename kyuubi-server/web-ui/src/{views => }/layout/components/header/types.ts (100%) rename kyuubi-server/web-ui/src/{views => }/layout/components/header/use-locales.ts (100%) rename kyuubi-server/web-ui/src/{views => }/layout/index.vue (100%) delete mode 100644 kyuubi-server/web-ui/src/pinia/modules/layout.ts rename kyuubi-server/web-ui/src/{views/common => utils}/use-table.ts (100%) diff --git a/kyuubi-server/web-ui/src/styles/element/index.scss b/kyuubi-server/web-ui/src/assets/styles/element/index.scss similarity index 100% rename from kyuubi-server/web-ui/src/styles/element/index.scss rename to kyuubi-server/web-ui/src/assets/styles/element/index.scss diff --git a/kyuubi-server/web-ui/src/styles/index.scss b/kyuubi-server/web-ui/src/assets/styles/index.scss similarity index 100% rename from kyuubi-server/web-ui/src/styles/index.scss rename to kyuubi-server/web-ui/src/assets/styles/index.scss diff --git a/kyuubi-server/web-ui/src/components/menu/index.vue b/kyuubi-server/web-ui/src/components/menu/index.vue index d6d4d1b56f1..41a64127368 100644 --- a/kyuubi-server/web-ui/src/components/menu/index.vue +++ b/kyuubi-server/web-ui/src/components/menu/index.vue @@ -22,7 +22,7 @@ :collapse="isCollapse" :default-active="activePath" :router="true"> -