From f23b6de6b3751fc2592eb834107b7381cf7ce130 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 6 Nov 2023 14:40:16 +0800 Subject: [PATCH] [KYUUBI #5579][AUTHZ] Support LogicalRelation don't have CatalogTable but have HadoopFsRelation ### _Why are the changes needed?_ To close #5579 Support LogicalRelation don't have CatalogTable but have HadoopFsRelation ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5581 from AngersZhuuuu/KYUUBI-5579. Closes #5579 0298db505 [Angerszhuuuu] Update uriExtractors.scala 05a9480bf [Angerszhuuuu] update 5acc91928 [Angerszhuuuu] Update PrivilegesBuilder.scala 77cc7f914 [Angerszhuuuu] update 47b79c7be [Angerszhuuuu] Update RangerSparkExtensionSuite.scala 96f2006d4 [Angerszhuuuu] Merge branch 'master' into KYUUBI-5579 651f3f6f3 [Angerszhuuuu] Update RangerSparkExtensionSuite.scala 8b5a650d5 [Angerszhuuuu] Update RangerSparkExtensionSuite.scala c37d655d2 [Angerszhuuuu] Merge branch 'master' into KYUUBI-5579 a71f3a6c7 [Angerszhuuuu] update d4bb5b43d [Angerszhuuuu] Update RangerSparkExtensionSuite.scala 6f634f404 [Angerszhuuuu] Merge branch 'master' into KYUUBI-5579 33282e21d [Angerszhuuuu] [KYUUBI #5579][AUTHZ] Support LogicalRelation don't have CatalogTable but have HadoopFsRelation Authored-by: Angerszhuuuu Signed-off-by: Cheng Pan --- ...uubi.plugin.spark.authz.serde.URIExtractor | 1 + .../src/main/resources/scan_command_spec.json | 29 ++++++++++----- .../spark/authz/PrivilegesBuilder.scala | 10 +++++- .../spark/authz/serde/CommandSpec.scala | 15 +++++++- .../spark/authz/serde/uriExtractors.scala | 10 ++++++ .../kyuubi/plugin/spark/authz/gen/Scans.scala | 3 +- .../ranger/RangerSparkExtensionSuite.scala | 36 +++++++++++++++++++ 7 files changed, 93 insertions(+), 11 deletions(-) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor index f0038e75b5f..f5b1c6e6db3 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor @@ -16,5 +16,6 @@ # org.apache.kyuubi.plugin.spark.authz.serde.CatalogStorageFormatURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.BaseRelationFileIndexURIExtractor org.apache.kyuubi.plugin.spark.authz.serde.OptionsUriExtractor org.apache.kyuubi.plugin.spark.authz.serde.StringURIExtractor diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json index 40a0d81c24f..1fbcc961259 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json @@ -5,7 +5,8 @@ "fieldExtractor" : "CatalogTableTableExtractor", "catalogDesc" : null } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.catalog.HiveTableRelation", "scanDescs" : [ { @@ -13,7 +14,8 @@ "fieldExtractor" : "CatalogTableTableExtractor", "catalogDesc" : null } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.datasources.LogicalRelation", "scanDescs" : [ { @@ -21,7 +23,13 @@ "fieldExtractor" : "CatalogTableOptionTableExtractor", "catalogDesc" : null } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "relation", + "fieldExtractor" : "BaseRelationFileIndexURIExtractor", + "actionTypeDesc" : null, + "isInput" : false + } ] }, { "classname" : "org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation", "scanDescs" : [ { @@ -29,7 +37,8 @@ "fieldExtractor" : "DataSourceV2RelationTableExtractor", "catalogDesc" : null } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveGenericUDF", "scanDescs" : [ ], @@ -43,7 +52,8 @@ "skipTypes" : [ "TEMP", "SYSTEM" ] }, "isInput" : true - } ] + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveGenericUDTF", "scanDescs" : [ ], @@ -57,7 +67,8 @@ "skipTypes" : [ "TEMP", "SYSTEM" ] }, "isInput" : true - } ] + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveSimpleUDF", "scanDescs" : [ ], @@ -71,7 +82,8 @@ "skipTypes" : [ "TEMP", "SYSTEM" ] }, "isInput" : true - } ] + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveUDAFFunction", "scanDescs" : [ ], @@ -85,5 +97,6 @@ "skipTypes" : [ "TEMP", "SYSTEM" ] }, "isInput" : true - } ] + } ], + "uriDescs" : [ ] } ] \ No newline at end of file diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala index 3d872690f65..b708b7f8b45 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala @@ -113,7 +113,15 @@ object PrivilegesBuilder { buildQuery(a.child, privilegeObjects, projectionList, cols, spark) case scan if isKnownScan(scan) && scan.resolved => - getScanSpec(scan).tables(scan, spark).foreach(mergeProjection(_, scan)) + val tables = getScanSpec(scan).tables(scan, spark) + // If the the scan is table-based, we check privileges on the table we found + // otherwise, we check privileges on the uri we found + if (tables.nonEmpty) { + tables.foreach(mergeProjection(_, scan)) + } else { + getScanSpec(scan).uris(scan).foreach( + privilegeObjects += PrivilegeObject(_, PrivilegeObjectActionType.OTHER)) + } case u if u.nodeName == "UnresolvedRelation" => val parts = invokeAs[String](u, "tableName").split("\\.") diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala index 7b306551cc3..14f3719b8a8 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala @@ -101,7 +101,8 @@ case class TableCommandSpec( case class ScanSpec( classname: String, scanDescs: Seq[ScanDesc], - functionDescs: Seq[FunctionDesc] = Seq.empty) extends CommandSpec { + functionDescs: Seq[FunctionDesc] = Seq.empty, + uriDescs: Seq[UriDesc] = Seq.empty) extends CommandSpec { override def opType: String = OperationType.QUERY.toString def tables: (LogicalPlan, SparkSession) => Seq[Table] = (plan, spark) => { scanDescs.flatMap { td => @@ -115,6 +116,18 @@ case class ScanSpec( } } + def uris: LogicalPlan => Seq[Uri] = plan => { + uriDescs.flatMap { ud => + try { + ud.extract(plan) + } catch { + case e: Exception => + LOG.debug(ud.error(plan, e)) + None + } + } + } + def functions: (Expression) => Seq[Function] = (expr) => { functionDescs.flatMap { fd => try { diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/uriExtractors.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/uriExtractors.scala index 418c5a0a084..3283c501912 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/uriExtractors.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/uriExtractors.scala @@ -18,6 +18,7 @@ package org.apache.kyuubi.plugin.spark.authz.serde import org.apache.spark.sql.catalyst.catalog.CatalogStorageFormat +import org.apache.spark.sql.execution.datasources.HadoopFsRelation trait URIExtractor extends (AnyRef => Seq[Uri]) with Extractor @@ -47,3 +48,12 @@ class OptionsUriExtractor extends URIExtractor { v1.asInstanceOf[Map[String, String]].get("path").map(Uri).toSeq } } + +class BaseRelationFileIndexURIExtractor extends URIExtractor { + override def apply(v1: AnyRef): Seq[Uri] = { + v1 match { + case h: HadoopFsRelation => h.location.rootPaths.map(_.toString).map(Uri) + case _ => Nil + } + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala index 7771a2dd227..ed17dc5dc43 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala @@ -37,7 +37,8 @@ object Scans extends CommandSpecs[ScanSpec] { ScanDesc( "catalogTable", classOf[CatalogTableOptionTableExtractor]) - ScanSpec(r, Seq(tableDesc)) + val uriDesc = UriDesc("relation", classOf[BaseRelationFileIndexURIExtractor]) + ScanSpec(r, Seq(tableDesc), uriDescs = Seq(uriDesc)) } val DataSourceV2Relation = { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala index 3bb8379bc7f..e8e4486ac3e 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala @@ -1097,4 +1097,40 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { } } } + + test("HadoopFsRelation") { + val db1 = defaultDb + val table1 = "table1" + withTempDir { path => + withSingleCallEnabled { + withCleanTmpResources(Seq((s"$db1.$table1", "table"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + doAs( + admin, + sql( + s""" + |INSERT OVERWRITE DIRECTORY '$path' + |USING parquet + |SELECT * FROM $db1.$table1""".stripMargin)) + + interceptContains[AccessControlException]( + doAs( + someone, + sql( + s""" + |INSERT OVERWRITE DIRECTORY '$path' + |USING parquet + |SELECT * FROM $db1.$table1""".stripMargin)))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope], " + + s"[update] privilege on [[$path, $path/]]") + + doAs(admin, sql(s"SELECT * FROM parquet.`$path`".stripMargin).explain(true)) + interceptContains[AccessControlException]( + doAs(someone, sql(s"SELECT * FROM parquet.`$path`".stripMargin).explain(true)))( + s"does not have [select] privilege on " + + s"[[file:$path, file:$path/]]") + } + } + } + } }