-
Notifications
You must be signed in to change notification settings - Fork 28.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support coalesce table cache stage partitions
- Loading branch information
1 parent
8982cee
commit 229a57c
Showing
19 changed files
with
633 additions
and
156 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
sql/core/src/main/scala/org/apache/spark/sql/execution/CachedRDD.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.execution | ||
|
||
import scala.reflect.ClassTag | ||
|
||
import org.apache.spark.{Dependency, NarrowDependency, Partition, TaskContext} | ||
import org.apache.spark.rdd.RDD | ||
|
||
/** | ||
* The [[Partition]] used by [[CachedRDD]]. | ||
*/ | ||
case class CachedRDDPartition( | ||
index: Int, | ||
originalPartitions: Array[Partition], | ||
@transient originalPreferredLocations: Seq[String]) extends Partition | ||
|
||
/** | ||
* It wraps the real cached RDD with coalesced partitions. | ||
* | ||
* @param prev The real cached RDD | ||
* @param partitionSpecs the coalesced partitions | ||
*/ | ||
class CachedRDD[T: ClassTag]( | ||
@transient var prev: RDD[T], | ||
partitionSpecs: Seq[CoalescedPartitionSpec]) | ||
extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies | ||
|
||
override protected def getPartitions: Array[Partition] = { | ||
Array.tabulate[Partition](partitionSpecs.length) { i => | ||
val spec = partitionSpecs(i) | ||
val originalPartitions = spec.startReducerIndex.until(spec.endReducerIndex) | ||
.map(prev.partitions).toArray | ||
val originalPreferredLocations = originalPartitions.flatMap(prev.preferredLocations) | ||
.distinct.toSeq | ||
CachedRDDPartition(i, originalPartitions, originalPreferredLocations) | ||
} | ||
} | ||
|
||
override protected def getPreferredLocations(split: Partition): Seq[String] = { | ||
split.asInstanceOf[CachedRDDPartition].originalPreferredLocations | ||
} | ||
|
||
override def compute(split: Partition, context: TaskContext): Iterator[T] = { | ||
split.asInstanceOf[CachedRDDPartition].originalPartitions.iterator.flatMap { partition => | ||
firstParent[T].iterator(partition, context) | ||
} | ||
} | ||
|
||
override def getDependencies: Seq[Dependency[_]] = { | ||
Seq(new NarrowDependency(prev) { | ||
def getParents(id: Int): Seq[Int] = | ||
partitions(id).asInstanceOf[CachedRDDPartition].originalPartitions.map(_.index).toSeq | ||
}) | ||
} | ||
|
||
override def clearDependencies(): Unit = { | ||
super.clearDependencies() | ||
prev = null | ||
} | ||
} |
71 changes: 71 additions & 0 deletions
71
sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQECacheReadExec.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.execution.adaptive | ||
|
||
import org.apache.spark.rdd.RDD | ||
import org.apache.spark.sql.catalyst.InternalRow | ||
import org.apache.spark.sql.catalyst.plans.physical.Partitioning | ||
import org.apache.spark.sql.execution.{CachedRDD, CoalescedPartitionSpec, ShufflePartitionSpec, SparkPlan, SQLExecution} | ||
import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} | ||
import org.apache.spark.sql.vectorized.ColumnarBatch | ||
|
||
/** | ||
* A wrapper of table cache query stage, which follows the given partition arrangement. | ||
* The RDD cache block is based on partition level, so we can not split the partition if it's | ||
* skewed. When [[AQECacheReadExec]] happen that means there are some partitions can be coalesced. | ||
* | ||
* @param child It should always be [[TableCacheQueryStageExec]]. | ||
* @param partitionSpecs The partition specs that defines the arrangement, requires at least one | ||
* partition. | ||
*/ | ||
case class AQECacheReadExec( | ||
child: SparkPlan, | ||
partitionSpecs: Seq[ShufflePartitionSpec]) extends AQERead { | ||
assert(partitionSpecs.forall(_.isInstanceOf[CoalescedPartitionSpec])) | ||
|
||
override def outputPartitioning: Partitioning = { | ||
outputPartitionWithCoalesced(partitionSpecs.length) | ||
} | ||
|
||
override lazy val metrics: Map[String, SQLMetric] = | ||
Map("numPartitions" -> SQLMetrics.createMetric(sparkContext, "number of partitions")) | ||
|
||
private def updateMetrics(): Unit = { | ||
metrics("numPartitions") += partitionSpecs.length | ||
|
||
val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) | ||
SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq) | ||
} | ||
|
||
override def stringArgs: Iterator[Any] = Iterator("coalesced") | ||
|
||
override protected def doExecute(): RDD[InternalRow] = { | ||
updateMetrics() | ||
val rdd = child.execute() | ||
new CachedRDD(rdd, partitionSpecs.asInstanceOf[Seq[CoalescedPartitionSpec]]) | ||
} | ||
|
||
override protected def doExecuteColumnar(): RDD[ColumnarBatch] = { | ||
updateMetrics() | ||
val rdd = child.executeColumnar() | ||
new CachedRDD(rdd, partitionSpecs.asInstanceOf[Seq[CoalescedPartitionSpec]]) | ||
} | ||
|
||
override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = | ||
copy(child = newChild) | ||
} |
57 changes: 57 additions & 0 deletions
57
sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQERead.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.execution.adaptive | ||
|
||
import org.apache.spark.sql.catalyst.expressions.Attribute | ||
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning, SinglePartition, UnknownPartitioning} | ||
import org.apache.spark.sql.catalyst.trees.CurrentOrigin | ||
import org.apache.spark.sql.execution.{ShufflePartitionSpec, SparkPlan, UnaryExecNode} | ||
|
||
abstract class AQERead extends UnaryExecNode { | ||
def child: SparkPlan | ||
def partitionSpecs: Seq[ShufflePartitionSpec] | ||
|
||
assert(partitionSpecs.nonEmpty, s"${getClass.getSimpleName} requires at least one partition") | ||
|
||
override final def output: Seq[Attribute] = child.output | ||
override final def supportsColumnar: Boolean = child.supportsColumnar | ||
override final def supportsRowBased: Boolean = child.supportsRowBased | ||
|
||
def outputPartitionWithCoalesced(numPartitions: Int): Partitioning = { | ||
// For coalesced shuffle read, the data distribution is not changed, only the number of | ||
// partitions is changed. | ||
child.outputPartitioning match { | ||
case h: HashPartitioning => | ||
CurrentOrigin.withOrigin(h.origin)(h.copy(numPartitions = numPartitions)) | ||
case r: RangePartitioning => | ||
CurrentOrigin.withOrigin(r.origin)(r.copy(numPartitions = numPartitions)) | ||
// This can only happen for `REBALANCE_PARTITIONS_BY_NONE`, which uses | ||
// `RoundRobinPartitioning` but we don't need to retain the number of partitions. | ||
case r: RoundRobinPartitioning => | ||
r.copy(numPartitions = numPartitions) | ||
case other@SinglePartition => | ||
throw new IllegalStateException( | ||
"Unexpected partitioning for coalesced shuffle read: " + other) | ||
case _ => | ||
// Spark plugins may have custom partitioning and may replace this operator | ||
// during the postStageOptimization phase, so return UnknownPartitioning here | ||
// rather than throw an exception | ||
UnknownPartitioning(numPartitions) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
...core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceCachePartitions.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.execution.adaptive | ||
|
||
import org.apache.spark.sql.SparkSession | ||
import org.apache.spark.sql.catalyst.rules.Rule | ||
import org.apache.spark.sql.execution.{ShufflePartitionSpec, SparkPlan, UnaryExecNode, UnionExec} | ||
import org.apache.spark.sql.internal.SQLConf | ||
|
||
/** | ||
* A rule to coalesce the cache partitions based on the statistics, which can | ||
* avoid many small reduce tasks that hurt performance. | ||
*/ | ||
case class CoalesceCachePartitions(session: SparkSession) extends Rule[SparkPlan] { | ||
override def apply(plan: SparkPlan): SparkPlan = { | ||
if (!conf.getConf(SQLConf.COALESCE_CACHE_PARTITIONS_ENABLED)) { | ||
return plan | ||
} | ||
|
||
val coalesceGroups = collectCoalesceGroups(plan) | ||
val groups = coalesceGroups.map { tableCacheStages => | ||
val stageIds = tableCacheStages.map(_.id) | ||
val bytesByPartitionIds = tableCacheStages.map(_.outputStats().map(_.bytesByPartitionId)) | ||
val inputPartitionSpecs = Seq.fill(bytesByPartitionIds.length)(None) | ||
(tableCacheStages.map(_.id), | ||
conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES), | ||
bytesByPartitionIds, | ||
inputPartitionSpecs, | ||
s"For table cache stage(${stageIds.mkString(", ")})") | ||
} | ||
val specsMap = ShufflePartitionsUtil.coalescePartitionsByGroup( | ||
groups, session.sparkContext.defaultParallelism) | ||
if (specsMap.nonEmpty) { | ||
updateCacheReads(plan, specsMap) | ||
} else { | ||
plan | ||
} | ||
} | ||
|
||
private def updateCacheReads( | ||
plan: SparkPlan, | ||
specsMap: Map[Int, Seq[ShufflePartitionSpec]]): SparkPlan = plan match { | ||
case stage: TableCacheQueryStageExec if specsMap.contains(stage.id) => | ||
AQECacheReadExec(stage, specsMap(stage.id)) | ||
case other => other.mapChildren(updateCacheReads(_, specsMap)) | ||
} | ||
|
||
private def collectCoalesceGroups( | ||
plan: SparkPlan): Seq[Seq[TableCacheQueryStageExec]] = plan match { | ||
case unary: UnaryExecNode => collectCoalesceGroups(unary.child) | ||
case union: UnionExec => union.children.flatMap(collectCoalesceGroups) | ||
case p if p.collectLeaves().forall(_.isInstanceOf[TableCacheQueryStageExec]) => | ||
collectTableCacheStages(p) :: Nil | ||
case _ => Seq.empty | ||
} | ||
|
||
private def collectTableCacheStages(plan: SparkPlan): Seq[TableCacheQueryStageExec] = plan match { | ||
case tableCacheStage: TableCacheQueryStageExec => Seq(tableCacheStage) | ||
case _ => plan.children.flatMap(collectTableCacheStages) | ||
} | ||
} |
Oops, something went wrong.