Skip to content

Commit

Permalink
[SPARK-38236][SQL][3.2][3.1] Check if table location is absolute by "…
Browse files Browse the repository at this point in the history
…new Path(locationUri).isAbsolute" in create/alter table

### What changes were proposed in this pull request?
After #28527, we change to create table under the database location when the table location is relative. However the criteria to determine if a table location is relative/absolute is `URI.isAbsolute`, which basically checks if the table location URI has a scheme defined. So table URIs like `/table/path` are treated as relative and the scheme and authority of the database location URI are used to create the table. For example, when the database location URI is `s3a://bucket/db`, the table will be created at `s3a://bucket/table/path`, while it should be created under the file system defined in `SessionCatalog.hadoopConf` instead.

This change fixes that by treating table location as absolute when the first letter of its path is slash.

This also applies to alter table.

### Why are the changes needed?
This is to fix the behavior described above.

### Does this PR introduce _any_ user-facing change?
Yes. When users try to create/alter a table with a location that starts with a slash but without a scheme defined, the table will be created under/altered to the file system defined in `SessionCatalog.hadoopConf`, instead of the one defined in the database location URI.

### How was this patch tested?
Updated unit tests.

Closes #35591 from bozhang2820/spark-31709-3.2.

Authored-by: Bo Zhang <bo.zhang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
bozhang2820 authored and cloud-fan committed Feb 24, 2022
1 parent 3ee9f78 commit 915f0cc
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ abstract class AvroSuite

override protected def beforeAll(): Unit = {
super.beforeAll()
// initialize SessionCatalog here so it has a clean hadoopConf
spark.sessionState.catalog
spark.conf.set(SQLConf.FILES_MAX_PARTITION_BYTES.key, 1024)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
.appName("MLlibUnitTest")
.getOrCreate()
sc = spark.sparkContext
// initialize SessionCatalog here so it has a clean hadoopConf
spark.sessionState.catalog

checkpointDir = Utils.createDirectory(tempDir.getCanonicalPath, "checkpoints").toString
sc.setCheckpointDir(checkpointDir)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ class SessionCatalog(
private def makeQualifiedTablePath(locationUri: URI, database: String): URI = {
if (locationUri.isAbsolute) {
locationUri
} else if (new Path(locationUri).isAbsolute) {
makeQualifiedPath(locationUri)
} else {
val dbName = formatDatabaseName(database)
val dbLocation = makeQualifiedDBPath(getDatabaseMetadata(dbName).locationUri)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.scalatest.BeforeAndAfter
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, TableCatalog, TableChange, V1Table}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange, V1Table}
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
import org.apache.spark.sql.util.CaseInsensitiveStringMap
Expand Down Expand Up @@ -60,7 +60,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
super.beforeAll()
val catalog = newCatalog()
catalog.createNamespace(Array("db"), emptyProps)
catalog.createNamespace(Array("db2"), emptyProps)
catalog.createNamespace(Array("db2"),
Map(SupportsNamespaces.PROP_LOCATION -> "file:///db2.db").asJava)
catalog.createNamespace(Array("ns"), emptyProps)
catalog.createNamespace(Array("ns2"), emptyProps)
}
Expand Down Expand Up @@ -186,10 +187,17 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path"))
catalog.dropTable(testIdent)

// absolute path
// absolute path without scheme
properties.put(TableCatalog.PROP_LOCATION, "/absolute/path")
val t3 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
assert(t3.catalogTable.location.toString === "file:/absolute/path")
assert(t3.catalogTable.location.toString === "file:///absolute/path")
catalog.dropTable(testIdent)

// absolute path with scheme
properties.put(TableCatalog.PROP_LOCATION, "file:/absolute/path")
val t4 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
assert(t4.catalogTable.location.toString === "file:/absolute/path")
catalog.dropTable(testIdent)
}

test("tableExists") {
Expand Down Expand Up @@ -686,10 +694,15 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
TableChange.setProperty(TableCatalog.PROP_LOCATION, "relative/path")).asInstanceOf[V1Table]
assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path"))

// absolute path
// absolute path without scheme
val t3 = catalog.alterTable(testIdent,
TableChange.setProperty(TableCatalog.PROP_LOCATION, "/absolute/path")).asInstanceOf[V1Table]
assert(t3.catalogTable.location.toString === "file:/absolute/path")
assert(t3.catalogTable.location.toString === "file:///absolute/path")

// absolute path with scheme
val t4 = catalog.alterTable(testIdent, TableChange.setProperty(
TableCatalog.PROP_LOCATION, "file:/absolute/path")).asInstanceOf[V1Table]
assert(t4.catalogTable.location.toString === "file:/absolute/path")
}

test("dropTable") {
Expand Down

0 comments on commit 915f0cc

Please sign in to comment.