[SPARK-32963][SQL] empty string should be consistent for schema name …

…in SparkGetSchemasOperation ### What changes were proposed in this pull request? This PR makes the empty string for schema name pattern match the global temp view as same as it works for other databases. This PR also add new tests to covering different kinds of wildcards to verify the SparkGetSchemasOperation ### Why are the changes needed? When the schema name is empty string, it is considered as ".*" and can match all databases in the catalog. But when it can not match the global temp view as it is not converted to ".*" ### Does this PR introduce _any_ user-facing change? yes , JDBC operation like `statement.getConnection.getMetaData..getSchemas(null, "")` now also provides the global temp view in the result set. ### How was this patch tested? new tests Closes #29834 from yaooqinn/SPARK-32963. Authored-by: Kent Yao <yaooqinn@hotmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
apache · Oct 6, 2020 · 17d309d · 17d309d
1 parent ec6fccb
commit 17d309d
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 9 deletions.
diff --git a/...rver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala b/...rver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
@@ -77,7 +77,8 @@ private[hive] class SparkGetSchemasOperation(
 
       val globalTempViewDb = sqlContext.sessionState.catalog.globalTempViewManager.database
       val databasePattern = Pattern.compile(CLIServiceUtils.patternToRegex(schemaName))
-      if (databasePattern.matcher(globalTempViewDb).matches()) {
+      if (schemaName == null || schemaName.isEmpty ||
+          databasePattern.matcher(globalTempViewDb).matches()) {
         rowSet.addRow(Array[AnyRef](globalTempViewDb, DEFAULT_HIVE_CATALOG))
       }
       setState(OperationState.FINISHED)

diff --git a/...r/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/...r/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.sql.{DatabaseMetaData, ResultSet}
 
+import org.apache.hive.service.cli.HiveSQLException
+
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.types._
 
@@ -28,23 +30,40 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
 
   test("Spark's own GetSchemasOperation(SparkGetSchemasOperation)") {
     def checkResult(rs: ResultSet, dbNames: Seq[String]): Unit = {
-      for (i <- dbNames.indices) {
-        assert(rs.next())
-        assert(rs.getString("TABLE_SCHEM") === dbNames(i))
+      val expected = dbNames.iterator
+      while(rs.next() || expected.hasNext) {
+        assert(rs.getString("TABLE_SCHEM") === expected.next)
+        assert(rs.getString("TABLE_CATALOG").isEmpty)
       }
       // Make sure there are no more elements
       assert(!rs.next())
+      assert(!expected.hasNext, "All expected schemas should be visited")
     }
 
-    withDatabase("db1", "db2") { statement =>
-      Seq("CREATE DATABASE db1", "CREATE DATABASE db2").foreach(statement.execute)
-
+    val dbs = Seq("db1", "db2", "db33", "db44")
+    val dbDflts = Seq("default", "global_temp")
+    withDatabase(dbs: _*) { statement =>
+      dbs.foreach( db => statement.execute(s"CREATE DATABASE IF NOT EXISTS $db"))
       val metaData = statement.getConnection.getMetaData
 
-      checkResult(metaData.getSchemas(null, "%"), Seq("db1", "db2", "default", "global_temp"))
+      Seq("", "%", null, ".*", "_*", "_%", ".%") foreach { pattern =>
+        checkResult(metaData.getSchemas(null, pattern), dbs ++ dbDflts)
+      }
+
+      Seq("db%", "db*") foreach { pattern =>
+        checkResult(metaData.getSchemas(null, pattern), dbs)
+      }
+
+      Seq("db_", "db.") foreach { pattern =>
+        checkResult(metaData.getSchemas(null, pattern), dbs.take(2))
+      }
+
       checkResult(metaData.getSchemas(null, "db1"), Seq("db1"))
       checkResult(metaData.getSchemas(null, "db_not_exist"), Seq.empty)
-      checkResult(metaData.getSchemas(null, "db*"), Seq("db1", "db2"))
+
+      val e = intercept[HiveSQLException](metaData.getSchemas(null, "*"))
+      assert(e.getCause.getMessage ===
+        "Error operating GET_SCHEMAS Dangling meta character '*' near index 0\n*\n^")
     }
   }