-
Notifications
You must be signed in to change notification settings - Fork 29.2k
[SPARK-19058][SQL] fix partition related behaviors with DataFrameWriter.saveAsTable #16460
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -473,22 +473,26 @@ case class DataSource( | |
| s"Unable to resolve $name given [${plan.output.map(_.name).mkString(", ")}]") | ||
| }.asInstanceOf[Attribute] | ||
| } | ||
| val fileIndex = catalogTable.map(_.identifier).map { tableIdent => | ||
| sparkSession.table(tableIdent).queryExecution.analyzed.collect { | ||
| case LogicalRelation(t: HadoopFsRelation, _, _) => t.location | ||
| }.head | ||
| } | ||
| // For partitioned relation r, r.schema's column ordering can be different from the column | ||
| // ordering of data.logicalPlan (partition columns are all moved after data column). This | ||
| // will be adjusted within InsertIntoHadoopFsRelation. | ||
| val plan = | ||
| InsertIntoHadoopFsRelationCommand( | ||
| outputPath = outputPath, | ||
| staticPartitions = Map.empty, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously, in this case, we do not call How did it work without this PR changes? Does that mean we just rely on Hive to implicitly call
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously, we did not refresh anything here, but we will repair the partitions in
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But,
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nvm. It does not store the metadata in catalog. |
||
| customPartitionLocations = Map.empty, | ||
| partitionColumns = columns, | ||
| bucketSpec = bucketSpec, | ||
| fileFormat = format, | ||
| refreshFunction = _ => Unit, // No existing table needs to be refreshed. | ||
| options = options, | ||
| query = data.logicalPlan, | ||
| mode = mode, | ||
| catalogTable = catalogTable) | ||
| catalogTable = catalogTable, | ||
| fileIndex = fileIndex) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we be issuing refresh table instead of refreshing the index directly?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was following the original behavior: https://github.com/apache/spark/pull/16460/files#diff-d99813bd5bbc18277e4090475e4944cfL240 Besides, it's hard to issue refresh table at |
||
| sparkSession.sessionState.executePlan(plan).toRdd | ||
| // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring it. | ||
| copy(userSpecifiedSchema = Some(data.schema.asNullable)).resolveRelation() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,11 +23,11 @@ import org.apache.hadoop.fs.{FileSystem, Path} | |
|
|
||
| import org.apache.spark.internal.io.FileCommitProtocol | ||
| import org.apache.spark.sql._ | ||
| import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable} | ||
| import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTablePartition} | ||
| import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec | ||
| import org.apache.spark.sql.catalyst.expressions.Attribute | ||
| import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
| import org.apache.spark.sql.execution.command.RunnableCommand | ||
| import org.apache.spark.sql.execution.command._ | ||
|
|
||
| /** | ||
| * A command for writing data to a [[HadoopFsRelation]]. Supports both overwriting and appending. | ||
|
|
@@ -37,23 +37,18 @@ import org.apache.spark.sql.execution.command.RunnableCommand | |
| * overwrites: when the spec is empty, all partitions are overwritten. | ||
| * When it covers a prefix of the partition keys, only partitions matching | ||
| * the prefix are overwritten. | ||
| * @param customPartitionLocations mapping of partition specs to their custom locations. The | ||
| * caller should guarantee that exactly those table partitions | ||
| * falling under the specified static partition keys are contained | ||
| * in this map, and that no other partitions are. | ||
| */ | ||
| case class InsertIntoHadoopFsRelationCommand( | ||
| outputPath: Path, | ||
| staticPartitions: TablePartitionSpec, | ||
| customPartitionLocations: Map[TablePartitionSpec, String], | ||
| partitionColumns: Seq[Attribute], | ||
| bucketSpec: Option[BucketSpec], | ||
| fileFormat: FileFormat, | ||
| refreshFunction: Seq[TablePartitionSpec] => Unit, | ||
| options: Map[String, String], | ||
| @transient query: LogicalPlan, | ||
| mode: SaveMode, | ||
| catalogTable: Option[CatalogTable]) | ||
| catalogTable: Option[CatalogTable], | ||
| fileIndex: Option[FileIndex]) | ||
| extends RunnableCommand { | ||
|
|
||
| import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName | ||
|
|
@@ -74,12 +69,30 @@ case class InsertIntoHadoopFsRelationCommand( | |
| val fs = outputPath.getFileSystem(hadoopConf) | ||
| val qualifiedOutputPath = outputPath.makeQualified(fs.getUri, fs.getWorkingDirectory) | ||
|
|
||
| val partitionsTrackedByCatalog = sparkSession.sessionState.conf.manageFilesourcePartitions && | ||
| catalogTable.isDefined && | ||
| catalogTable.get.partitionColumnNames.nonEmpty && | ||
| catalogTable.get.tracksPartitionsInCatalog | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also check
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is something I wanna check with @ericl . What if users create a table with partition management, then turn it off, and read this table? If we treat this table as normal table, then the data in custom partition path will be ignored. I think we should respect the partition management flag when the table was created, not when the table is read.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, in other parts of the code we assume that the feature is completely disabled when the flag is off. This is probably needed since there is no way to revert a table otherwise.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you mean we should completely ignore the partition information in metastore, when the flag is off, so that we should also ignore the data in custom partition path?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I think we should revert to 2.0 behavior as if querying the table from 2.0 |
||
|
|
||
| var initialMatchingPartitions: Seq[TablePartitionSpec] = Nil | ||
| var customPartitionLocations: Map[TablePartitionSpec, String] = Map.empty | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It sounds like we do not need to use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea it's true, but then the code may looks ugly, e.g. |
||
|
|
||
| // When partitions are tracked by the catalog, compute all custom partition locations that | ||
| // may be relevant to the insertion job. | ||
| if (partitionsTrackedByCatalog) { | ||
| val matchingPartitions = sparkSession.sessionState.catalog.listPartitions( | ||
| catalogTable.get.identifier, Some(staticPartitions)) | ||
| initialMatchingPartitions = matchingPartitions.map(_.spec) | ||
| customPartitionLocations = getCustomPartitionLocations( | ||
| fs, catalogTable.get, qualifiedOutputPath, matchingPartitions) | ||
| } | ||
|
|
||
| val pathExists = fs.exists(qualifiedOutputPath) | ||
| val doInsertion = (mode, pathExists) match { | ||
| case (SaveMode.ErrorIfExists, true) => | ||
| throw new AnalysisException(s"path $qualifiedOutputPath already exists.") | ||
| case (SaveMode.Overwrite, true) => | ||
| deleteMatchingPartitions(fs, qualifiedOutputPath) | ||
| deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations) | ||
| true | ||
| case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) => | ||
| true | ||
|
|
@@ -98,6 +111,27 @@ case class InsertIntoHadoopFsRelationCommand( | |
| outputPath = outputPath.toString, | ||
| isAppend = isAppend) | ||
|
|
||
| // Callback for updating metastore partition metadata after the insertion job completes. | ||
| def refreshPartitionsCallback(updatedPartitions: Seq[TablePartitionSpec]): Unit = { | ||
| if (partitionsTrackedByCatalog) { | ||
| val newPartitions = updatedPartitions.toSet -- initialMatchingPartitions | ||
| if (newPartitions.nonEmpty) { | ||
| AlterTableAddPartitionCommand( | ||
| catalogTable.get.identifier, newPartitions.toSeq.map(p => (p, None)), | ||
| ifNotExists = true).run(sparkSession) | ||
| } | ||
| if (mode == SaveMode.Overwrite) { | ||
| val deletedPartitions = initialMatchingPartitions.toSet -- updatedPartitions | ||
| if (deletedPartitions.nonEmpty) { | ||
| AlterTableDropPartitionCommand( | ||
| catalogTable.get.identifier, deletedPartitions.toSeq, | ||
| ifExists = true, purge = false, | ||
| retainData = true /* already deleted */).run(sparkSession) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| FileFormatWriter.write( | ||
| sparkSession = sparkSession, | ||
| queryExecution = Dataset.ofRows(sparkSession, query).queryExecution, | ||
|
|
@@ -108,8 +142,10 @@ case class InsertIntoHadoopFsRelationCommand( | |
| hadoopConf = hadoopConf, | ||
| partitionColumns = partitionColumns, | ||
| bucketSpec = bucketSpec, | ||
| refreshFunction = refreshFunction, | ||
| refreshFunction = refreshPartitionsCallback, | ||
| options = options) | ||
|
|
||
| fileIndex.foreach(_.refresh()) | ||
| } else { | ||
| logInfo("Skipping insertion into a relation that already exists.") | ||
| } | ||
|
|
@@ -121,7 +157,10 @@ case class InsertIntoHadoopFsRelationCommand( | |
| * Deletes all partition files that match the specified static prefix. Partitions with custom | ||
| * locations are also cleared based on the custom locations map given to this class. | ||
| */ | ||
| private def deleteMatchingPartitions(fs: FileSystem, qualifiedOutputPath: Path): Unit = { | ||
| private def deleteMatchingPartitions( | ||
| fs: FileSystem, | ||
| qualifiedOutputPath: Path, | ||
| customPartitionLocations: Map[TablePartitionSpec, String]): Unit = { | ||
| val staticPartitionPrefix = if (staticPartitions.nonEmpty) { | ||
| "/" + partitionColumns.flatMap { p => | ||
| staticPartitions.get(p.name) match { | ||
|
|
@@ -152,4 +191,29 @@ case class InsertIntoHadoopFsRelationCommand( | |
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Given a set of input partitions, returns those that have locations that differ from the | ||
| * Hive default (e.g. /k1=v1/k2=v2). These partitions were manually assigned locations by | ||
| * the user. | ||
| * | ||
| * @return a mapping from partition specs to their custom locations | ||
| */ | ||
| private def getCustomPartitionLocations( | ||
| fs: FileSystem, | ||
| table: CatalogTable, | ||
| qualifiedOutputPath: Path, | ||
| partitions: Seq[CatalogTablePartition]): Map[TablePartitionSpec, String] = { | ||
| partitions.flatMap { p => | ||
| val defaultLocation = qualifiedOutputPath.suffix( | ||
| "/" + PartitioningUtils.getPathFragment(p.spec, table.partitionSchema)).toString | ||
| val catalogLocation = new Path(p.location).makeQualified( | ||
| fs.getUri, fs.getWorkingDirectory).toString | ||
| if (catalogLocation != defaultLocation) { | ||
| Some(p.spec -> catalogLocation) | ||
| } else { | ||
| None | ||
| } | ||
| }.toMap | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this already done by the insertion command?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I moved it from insertion command to here, as we only need to refresh the table for overwrite. For append, we only need to refresh the
FileIndex