Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-37150][SQL] Migrate DESCRIBE NAMESPACE to use V2 command by default #34429

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ShowNamespaces, ShowTables}
import org.apache.spark.sql.catalyst.plans.logical.{DescribeNamespace, LogicalPlan, ShowNamespaces, ShowTables}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND
import org.apache.spark.sql.internal.SQLConf
Expand All @@ -39,6 +39,10 @@ object KeepLegacyOutputs extends Rule[LogicalPlan] {
case s: ShowNamespaces =>
assert(s.output.length == 1)
s.copy(output = Seq(s.output.head.withName("databaseName")))
case d: DescribeNamespace =>
assert(d.output.length == 2)
d.copy(output = Seq(d.output.head.withName("database_description_item"),
d.output.last.withName("database_description_value")))
}
}
}
Expand Down
Expand Up @@ -105,15 +105,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
case UnsetViewProperties(ResolvedView(ident, _), keys, ifExists) =>
AlterTableUnsetPropertiesCommand(ident.asTableIdentifier, keys, ifExists, isView = true)

case DescribeNamespace(DatabaseInSessionCatalog(db), extended, output) =>
val newOutput = if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
assert(output.length == 2)
Seq(output.head.withName("database_description_item"),
output.last.withName("database_description_value"))
} else {
output
}
DescribeDatabaseCommand(db, extended, newOutput)
case DescribeNamespace(DatabaseInSessionCatalog(db), extended, output) if conf.useV1Command =>
DescribeDatabaseCommand(db, extended, output)

case SetNamespaceProperties(DatabaseInSessionCatalog(db), properties) =>
AlterDatabasePropertiesCommand(db, properties)
Expand Down
Expand Up @@ -44,10 +44,13 @@ case class DescribeNamespaceExec(
}

if (isExtended) {
val properties = metadata.asScala -- CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES
if (properties.nonEmpty) {
rows += toCatalystRow("Properties", properties.toSeq.mkString("(", ",", ")"))
val properties = metadata.asScala.toMap -- CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding toMap similar to

.

The Scala Map implementation uses a list underneath up to 4 elements and maintains it in the order it was inserted. So this make tests such as

Row("Properties", "((a,a), (b,b), (c,c), (d,d))") :: Nil)
pass.

Maybe, just make the tests more robust?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just changed the command to sort the properties by keys (similar to show tblproperties).

val propertiesStr = if (properties.isEmpty) {
""
} else {
properties.toSeq.mkString("(", ", ", ")")
}
rows += toCatalystRow("Properties", propertiesStr)
Copy link
Contributor Author

@imback82 imback82 Oct 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cloud-fan These are the differences between v1 and v2 command:

  1. For extended mode, v1 command prints the "Properties" row with an empty string whereas v2 command doesn't print the row if there are no properties.
  2. v1 command puts extra space after , among properties: e.g., ((a,b), (c,d)) in v1 vs. ((a,b),(c,d)) in v2.
  3. v1 command uses Database Name vs. Namespace Name in v2.

1) and 2) are easy to resolve (e.g., just follow v1 behavior), but I wasn't sure the best way to address 3). Do we need to unify this property name?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's OK to make this behavior change (database -> namespace). People can still fallback to v1 command if needed.

The tests need to be a bit more complicated though, to allow different results between v1 and v2 commands.

}
rows.toSeq
}
Expand Down
Expand Up @@ -1079,7 +1079,8 @@ class DataSourceV2SQLSuite
assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest")
.toDF("k", "v")
.where("k='Properties'")
.isEmpty, s"$key is a reserved namespace property and ignored")
.where("v=''")
.count == 1, s"$key is a reserved namespace property and ignored")
val meta =
catalog("testcat").asNamespaceCatalog.loadNamespaceMetadata(Array("reservedTest"))
assert(meta.get(key) == null || !meta.get(key).contains("foo"),
Expand Down Expand Up @@ -1244,7 +1245,7 @@ class DataSourceV2SQLSuite
Row(SupportsNamespaces.PROP_COMMENT.capitalize, "test namespace"),
Row(SupportsNamespaces.PROP_LOCATION.capitalize, "/tmp/ns_test"),
Row(SupportsNamespaces.PROP_OWNER.capitalize, defaultUser),
Row("Properties", "((a,b),(b,a),(c,c))"))
Row("Properties", "((a,b), (b,a), (c,c))"))
)
}
}
Expand All @@ -1270,7 +1271,8 @@ class DataSourceV2SQLSuite
assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest")
.toDF("k", "v")
.where("k='Properties'")
.isEmpty, s"$key is a reserved namespace property and ignored")
.where("v=''")
.count == 1, s"$key is a reserved namespace property and ignored")
val meta =
catalog("testcat").asNamespaceCatalog.loadNamespaceMetadata(Array("reservedTest"))
assert(meta.get(key) == null || !meta.get(key).contains("foo"),
Expand All @@ -1290,7 +1292,8 @@ class DataSourceV2SQLSuite
Row("Namespace Name", "ns2"),
Row(SupportsNamespaces.PROP_COMMENT.capitalize, "test namespace"),
Row(SupportsNamespaces.PROP_LOCATION.capitalize, "/tmp/ns_test_2"),
Row(SupportsNamespaces.PROP_OWNER.capitalize, defaultUser))
Row(SupportsNamespaces.PROP_OWNER.capitalize, defaultUser),
Row("Properties", ""))
)
}
}
Expand Down
Expand Up @@ -762,7 +762,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
checkAnswer(
sql(s"DESCRIBE DATABASE EXTENDED $dbName").toDF("key", "value")
.where("key not like 'Owner%'"), // filter for consistency with in-memory catalog
Row("Database Name", dbNameWithoutBackTicks) ::
Row("Namespace Name", dbNameWithoutBackTicks) ::
Row("Comment", "") ::
Row("Location", CatalogUtils.URIToString(location)) ::
Row("Properties", "((a,a), (b,b), (c,c))") :: Nil)
Expand All @@ -772,7 +772,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
checkAnswer(
sql(s"DESCRIBE DATABASE EXTENDED $dbName").toDF("key", "value")
.where("key not like 'Owner%'"), // filter for consistency with in-memory catalog
Row("Database Name", dbNameWithoutBackTicks) ::
Row("Namespace Name", dbNameWithoutBackTicks) ::
Row("Comment", "") ::
Row("Location", CatalogUtils.URIToString(location)) ::
Row("Properties", "((a,a), (b,b), (c,c), (d,d))") :: Nil)
Expand Down
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{AnalysisException, QueryTest}
import org.apache.spark.sql.internal.SQLConf

/**
* This base suite contains unified tests for the `DESCRIBE NAMESPACE` command that check V1 and V2
Expand Down Expand Up @@ -47,4 +48,21 @@ trait DescribeNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
// TODO: Move this to DropNamespaceSuite when the test suite is introduced.
sql(s"DROP NAMESPACE IF EXISTS $catalog.$ns")
}

test("Keep the legacy output schema") {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now run for both v1/v2 catalogs.

Seq(true, false).foreach { keepLegacySchema =>
withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> keepLegacySchema.toString) {
val ns = "db1"
withNamespace(ns) {
sql(s"CREATE NAMESPACE $ns")
val schema = sql(s"DESCRIBE NAMESPACE $ns").schema.fieldNames.toSeq
if (keepLegacySchema) {
assert(schema === Seq("database_description_item", "database_description_value"))
} else {
assert(schema === Seq("info_name", "info_value"))
}
}
}
}
}
}
Expand Up @@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.command.v1

import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.command
import org.apache.spark.sql.internal.SQLConf

/**
* This base suite contains unified tests for the `DESCRIBE NAMESPACE` command that checks V1
Expand All @@ -30,7 +29,8 @@ import org.apache.spark.sql.internal.SQLConf
* - V1 Hive External catalog:
* `org.apache.spark.sql.hive.execution.command.DescribeNamespaceSuite`
*/
trait DescribeNamespaceSuiteBase extends command.DescribeNamespaceSuiteBase {
trait DescribeNamespaceSuiteBase extends command.DescribeNamespaceSuiteBase
with command.TestsV1AndV2Commands {
override def notFoundMsgPrefix: String = "Database"

test("basic") {
Expand All @@ -43,35 +43,21 @@ trait DescribeNamespaceSuiteBase extends command.DescribeNamespaceSuiteBase {
.where("key not like 'Owner%'") // filter for consistency with in-memory catalog
.collect()

val namePrefix = if (conf.useV1Command) "Database" else "Namespace"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will be the only difference b/w v1 and v2 command.

assert(result.length == 4)
assert(result(0) === Row("Database Name", ns))
assert(result(0) === Row(s"$namePrefix Name", ns))
assert(result(1) === Row("Comment", ""))
// Check only the key for "Location" since its value depends on warehouse path, etc.
assert(result(2).getString(0) === "Location")
assert(result(3) === Row("Properties", ""))
}
}

test("Keep the legacy output schema") {
Seq(true, false).foreach { keepLegacySchema =>
withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> keepLegacySchema.toString) {
val ns = "db1"
withNamespace(ns) {
sql(s"CREATE NAMESPACE $ns")
val schema = sql(s"DESCRIBE NAMESPACE $ns").schema.fieldNames.toSeq
if (keepLegacySchema) {
assert(schema === Seq("database_description_item", "database_description_value"))
} else {
assert(schema === Seq("info_name", "info_value"))
}
}
}
}
}
}

/**
* The class contains tests for the `DESCRIBE NAMESPACE` command to check V1 In-Memory
* table catalog.
*/
class DescribeNamespaceSuite extends DescribeNamespaceSuiteBase with CommandSuiteBase
class DescribeNamespaceSuite extends DescribeNamespaceSuiteBase with CommandSuiteBase {
override def commandVersion: String = super[DescribeNamespaceSuiteBase].commandVersion
}
Expand Up @@ -23,4 +23,6 @@ import org.apache.spark.sql.execution.command.v1
* The class contains tests for the `DESCRIBE NAMESPACE` command to check V1 Hive external
* table catalog.
*/
class DescribeNamespaceSuite extends v1.DescribeNamespaceSuiteBase with CommandSuiteBase
class DescribeNamespaceSuite extends v1.DescribeNamespaceSuiteBase with CommandSuiteBase {
override def commandVersion: String = super[DescribeNamespaceSuiteBase].commandVersion
}