Skip to content

Commit

Permalink
[SPARK-32963][SQL] empty string should be consistent for schema name …
Browse files Browse the repository at this point in the history
…in SparkGetSchemasOperation

### What changes were proposed in this pull request?
This PR makes the empty string for schema name pattern match the global temp view as same as it works for other databases.

This PR also add new tests to covering different kinds of wildcards to verify the SparkGetSchemasOperation

### Why are the changes needed?

When the schema name is empty string, it is considered as ".*" and can match all databases in the catalog.
But when it can not match the global temp view as it is not converted to ".*"

### Does this PR introduce _any_ user-facing change?

yes , JDBC operation like `statement.getConnection.getMetaData..getSchemas(null, "")` now also provides the global temp view in the result set.

### How was this patch tested?

new tests

Closes #29834 from yaooqinn/SPARK-32963.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
yaooqinn authored and cloud-fan committed Oct 6, 2020
1 parent ec6fccb commit 17d309d
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 9 deletions.
Expand Up @@ -77,7 +77,8 @@ private[hive] class SparkGetSchemasOperation(

val globalTempViewDb = sqlContext.sessionState.catalog.globalTempViewManager.database
val databasePattern = Pattern.compile(CLIServiceUtils.patternToRegex(schemaName))
if (databasePattern.matcher(globalTempViewDb).matches()) {
if (schemaName == null || schemaName.isEmpty ||
databasePattern.matcher(globalTempViewDb).matches()) {
rowSet.addRow(Array[AnyRef](globalTempViewDb, DEFAULT_HIVE_CATALOG))
}
setState(OperationState.FINISHED)
Expand Down
Expand Up @@ -19,6 +19,8 @@ package org.apache.spark.sql.hive.thriftserver

import java.sql.{DatabaseMetaData, ResultSet}

import org.apache.hive.service.cli.HiveSQLException

import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.types._

Expand All @@ -28,23 +30,40 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest {

test("Spark's own GetSchemasOperation(SparkGetSchemasOperation)") {
def checkResult(rs: ResultSet, dbNames: Seq[String]): Unit = {
for (i <- dbNames.indices) {
assert(rs.next())
assert(rs.getString("TABLE_SCHEM") === dbNames(i))
val expected = dbNames.iterator
while(rs.next() || expected.hasNext) {
assert(rs.getString("TABLE_SCHEM") === expected.next)
assert(rs.getString("TABLE_CATALOG").isEmpty)
}
// Make sure there are no more elements
assert(!rs.next())
assert(!expected.hasNext, "All expected schemas should be visited")
}

withDatabase("db1", "db2") { statement =>
Seq("CREATE DATABASE db1", "CREATE DATABASE db2").foreach(statement.execute)

val dbs = Seq("db1", "db2", "db33", "db44")
val dbDflts = Seq("default", "global_temp")
withDatabase(dbs: _*) { statement =>
dbs.foreach( db => statement.execute(s"CREATE DATABASE IF NOT EXISTS $db"))
val metaData = statement.getConnection.getMetaData

checkResult(metaData.getSchemas(null, "%"), Seq("db1", "db2", "default", "global_temp"))
Seq("", "%", null, ".*", "_*", "_%", ".%") foreach { pattern =>
checkResult(metaData.getSchemas(null, pattern), dbs ++ dbDflts)
}

Seq("db%", "db*") foreach { pattern =>
checkResult(metaData.getSchemas(null, pattern), dbs)
}

Seq("db_", "db.") foreach { pattern =>
checkResult(metaData.getSchemas(null, pattern), dbs.take(2))
}

checkResult(metaData.getSchemas(null, "db1"), Seq("db1"))
checkResult(metaData.getSchemas(null, "db_not_exist"), Seq.empty)
checkResult(metaData.getSchemas(null, "db*"), Seq("db1", "db2"))

val e = intercept[HiveSQLException](metaData.getSchemas(null, "*"))
assert(e.getCause.getMessage ===
"Error operating GET_SCHEMAS Dangling meta character '*' near index 0\n*\n^")
}
}

Expand Down

0 comments on commit 17d309d

Please sign in to comment.