From c7600c24221d29fde31dca921d9d5863af2666e9 Mon Sep 17 00:00:00 2001 From: Xingbo Jiang Date: Thu, 26 Jul 2018 11:24:01 +0800 Subject: [PATCH] update --- .../scala/org/apache/spark/rdd/MapPartitionsRDD.scala | 9 +++++++++ core/src/main/scala/org/apache/spark/rdd/RDD.scala | 5 ++++- .../scala/org/apache/spark/scheduler/ActiveJob.scala | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala index d4ad09143e80d..904d9c025629f 100644 --- a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala @@ -23,6 +23,15 @@ import org.apache.spark.{Partition, TaskContext} /** * An RDD that applies the provided function to every partition of the parent RDD. + * + * @param prev the parent RDD. + * @param f The function used to map a tuple of (TaskContext, partition index, input iterator) to + * an output iterator. + * @param preservesPartitioning Whether the input function preserves the partitioner, which should + * be `false` unless `prev` is a pair RDD and the input function + * doesn't modify the keys. + * @param isFromBarrier Indicates whether this RDD is transformed from an RDDBarrier, a stage + * containing at least one RDDBarrier shall be turned into a barrier stage. */ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag]( var prev: RDD[T], diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 04698132adf83..cbc1143126d8e 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1853,8 +1853,11 @@ abstract class RDD[T: ClassTag]( * barrier stage. * * An RDD is in a barrier stage, if at least one of its parent RDD(s), or itself, are mapped from - * a RDDBarrier. This function always returns false for a [[ShuffledRDD]], since a + * an [[RDDBarrier]]. This function always returns false for a [[ShuffledRDD]], since a * [[ShuffledRDD]] indicates start of a new stage. + * + * A [[MapPartitionsRDD]] can be transformed from an [[RDDBarrier]], under that case the + * [[MapPartitionsRDD]] shall be marked as barrier. */ private[spark] def isBarrier(): Boolean = isBarrier_ diff --git a/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala b/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala index b3bbfb1683c5c..6e4d062749d5f 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala @@ -61,7 +61,7 @@ private[spark] class ActiveJob( var numFinished = 0 - // Mark all the partitions of the stage to be not finished. + /** Resets the status of all partitions in this stage so they are marked as not finished. */ def resetAllPartitions(): Unit = { (0 until numPartitions).foreach(finished.update(_, false)) numFinished = 0