Skip to content

Commit

Permalink
[SPARK-30183][SQL] Disallow to specify reserved properties in CREATE/…
Browse files Browse the repository at this point in the history
…ALTER NAMESPACE syntax

### What changes were proposed in this pull request?
Currently, COMMENT and LOCATION are reserved properties for Datasource v2 namespaces. They can be set via specific clauses and via properties. And the ones specified in clauses take precede of properties. Since they are reserved, which means they are not able to visit directly. They should be used in COMMENT/LOCATION clauses ONLY.

### Why are the changes needed?
make reserved properties be reserved.

### Does this PR introduce any user-facing change?
yes, 'location', 'comment' are not allowed use in db properties

### How was this patch tested?
UNIT tests.

Closes #26806 from yaooqinn/SPARK-30183.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
yaooqinn authored and cloud-fan committed Jan 9, 2020
1 parent 92a0877 commit c373123
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/sql-migration-guide.md
Expand Up @@ -264,6 +264,8 @@ license: |

- Since Spark 3.0, the function `percentile_approx` and its alias `approx_percentile` only accept integral value with range in `[1, 2147483647]` as its 3rd argument `accuracy`, fractional and string types are disallowed, e.g. `percentile_approx(10.0, 0.2, 1.8D)` will cause `AnalysisException`. In Spark version 2.4 and earlier, if `accuracy` is fractional or string value, it will be coerced to an int value, `percentile_approx(10.0, 0.2, 1.8D)` is operated as `percentile_approx(10.0, 0.2, 1)` which results in `10.0`.

- Since Spark 3.0, `location` and `comment` become reserved database properties, Commands will fail if we specify reserved properties in `CREATE DATABASE ... WITH DBPROPERTIES` and `ALTER DATABASE ... SET DBPROPERTIES`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, in this case, these properties will be silently removed, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but only create a headless property just like `'a'='b'`.

## Upgrading from Spark SQL 2.4 to 2.4.1

- The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
Expand Down
Expand Up @@ -2520,6 +2520,26 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
}
}

private def cleanNamespaceProperties(
properties: Map[String, String],
ctx: ParserRuleContext): Map[String, String] = withOrigin(ctx) {
import SupportsNamespaces._
if (!conf.getConf(SQLConf.LEGACY_PROPERTY_NON_RESERVED)) {
properties.foreach {
case (PROP_LOCATION, _) =>
throw new ParseException(s"$PROP_LOCATION is a reserved namespace property, please use" +
s" the LOCATION clause to specify it.", ctx)
case (PROP_COMMENT, _) =>
throw new ParseException(s"$PROP_COMMENT is a reserved namespace property, please use" +
s" the COMMENT clause to specify it.", ctx)
case _ =>
}
properties
} else {
properties -- RESERVED_PROPERTIES.asScala
}
}

/**
* Create a [[CreateNamespaceStatement]] command.
*
Expand All @@ -2535,6 +2555,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
* }}}
*/
override def visitCreateNamespace(ctx: CreateNamespaceContext): LogicalPlan = withOrigin(ctx) {
import SupportsNamespaces._
checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
checkDuplicateClauses(ctx.PROPERTIES, "WITH PROPERTIES", ctx)
Expand All @@ -2548,12 +2569,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
.map(visitPropertyKeyValues)
.getOrElse(Map.empty)

properties = cleanNamespaceProperties(properties, ctx)

visitCommentSpecList(ctx.commentSpec()).foreach {
properties += SupportsNamespaces.PROP_COMMENT -> _
properties += PROP_COMMENT -> _
}

visitLocationSpecList(ctx.locationSpec()).foreach {
properties += SupportsNamespaces.PROP_LOCATION -> _
properties += PROP_LOCATION -> _
}

CreateNamespaceStatement(
Expand Down Expand Up @@ -2588,9 +2611,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
*/
override def visitSetNamespaceProperties(ctx: SetNamespacePropertiesContext): LogicalPlan = {
withOrigin(ctx) {
val properties = cleanNamespaceProperties(visitPropertyKeyValues(ctx.tablePropertyList), ctx)
AlterNamespaceSetProperties(
UnresolvedNamespace(visitMultipartIdentifier(ctx.multipartIdentifier)),
visitPropertyKeyValues(ctx.tablePropertyList))
properties)
}
}

Expand Down
Expand Up @@ -2136,6 +2136,15 @@ object SQLConf {
"defined by `from` and `to`.")
.booleanConf
.createWithDefault(false)

val LEGACY_PROPERTY_NON_RESERVED =
buildConf("spark.sql.legacy.property.nonReserved")
.internal()
.doc("When true, all database and table properties are not reserved and available for " +
"create/alter syntaxes. But please be aware that the reserved properties will be " +
"silently removed.")
.booleanConf
.createWithDefault(false)
}

/**
Expand Down
Expand Up @@ -22,6 +22,7 @@ import scala.collection.JavaConverters._
import org.apache.spark.SparkException
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.connector.catalog._
import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
Expand Down Expand Up @@ -864,6 +865,31 @@ class DataSourceV2SQLSuite
}
}

test("CreateNameSpace: reserved properties") {
withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key =>
val exception = intercept[ParseException] {
sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='dummyVal')")
}
assert(exception.getMessage.contains(s"$key is a reserved namespace property"))
}
}
withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key =>
withNamespace("testcat.reservedTest") {
sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='foo')")
assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest")
.toDF("k", "v")
.where("k='Properties'")
.isEmpty, s"$key is a reserved namespace property and ignored")
val meta =
catalog("testcat").asNamespaceCatalog.loadNamespaceMetadata(Array("reservedTest"))
assert(meta.get(key) === null, "reserved properties should not have side effects")
}
}
}
}

test("DropNamespace: basic tests") {
// Session catalog is used.
sql("CREATE NAMESPACE ns")
Expand Down Expand Up @@ -961,6 +987,35 @@ class DataSourceV2SQLSuite
}
}

test("AlterNamespaceSetProperties: reserved properties") {
withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key =>
withNamespace("testcat.reservedTest") {
sql("CREATE NAMESPACE testcat.reservedTest")
val exception = intercept[ParseException] {
sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='dummyVal')")
}
assert(exception.getMessage.contains(s"$key is a reserved namespace property"))
}
}
}
withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key =>
withNamespace("testcat.reservedTest") {
sql(s"CREATE NAMESPACE testcat.reservedTest")
sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='foo')")
assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest")
.toDF("k", "v")
.where("k='Properties'")
.isEmpty, s"$key is a reserved namespace property and ignored")
val meta =
catalog("testcat").asNamespaceCatalog.loadNamespaceMetadata(Array("reservedTest"))
assert(meta.get(key) === null, "reserved properties should not have side effects")
}
}
}
}

test("AlterNamespaceSetLocation using v2 catalog") {
withNamespace("testcat.ns1.ns2") {
sql("CREATE NAMESPACE IF NOT EXISTS testcat.ns1.ns2 COMMENT " +
Expand Down

0 comments on commit c373123

Please sign in to comment.