apache · gcz2022 · Nov 16, 2017 · Nov 16, 2017 · Nov 16, 2017 · Nov 16, 2017
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -23,11 +23,14 @@ import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
+import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
 import org.apache.spark.broadcast.{Broadcast, BroadcastManager}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.shuffle.MetadataFetchFailedException
@@ -472,16 +475,45 @@ private[spark] class MapOutputTrackerMaster(
     shuffleStatuses.get(shuffleId).map(_.findMissingPartitions())
   }
 
+  /**
+   * Try to equally divide Range(0, num) to divisor slices
+   */
+  def equallyDivide(num: Int, divisor: Int): Iterator[Seq[Int]] = {
+    assert(divisor > 0, "Divisor should be positive")
+    val (each, remain) = (num / divisor, num % divisor)
+    val (smaller, bigger) = (0 until num).splitAt((divisor-remain) * each)
+    if (each != 0) {
+      smaller.grouped(each) ++ bigger.grouped(each + 1)
+    } else {
+      bigger.grouped(each + 1)
+    }
+  }
+
   /**
    * Return statistics about all of the outputs for a given shuffle.
    */
   def getStatistics(dep: ShuffleDependency[_, _, _]): MapOutputStatistics = {
     shuffleStatuses(dep.shuffleId).withMapStatuses { statuses =>
       val totalSizes = new Array[Long](dep.partitioner.numPartitions)
-      for (s <- statuses) {
-        for (i <- 0 until totalSizes.length) {
-          totalSizes(i) += s.getSizeForBlock(i)
+      if (statuses.length * totalSizes.length <=
+        conf.get(SHUFFLE_MAP_OUTPUT_STATISTICS_PARALLEL_AGGREGATION_THRESHOLD)) {
+        for (s <- statuses) {
+          for (i <- 0 until totalSizes.length) {
+            totalSizes(i) += s.getSizeForBlock(i)
+          }
+        }
+      } else {
+        val parallelism = conf.get(SHUFFLE_MAP_OUTPUT_STATISTICS_CORES)
+        val threadPool = ThreadUtils.newDaemonFixedThreadPool(parallelism, "map-output-statistics")
+        val executionContext = ExecutionContext.fromExecutor(threadPool)
+        val mapStatusSubmitTasks = equallyDivide(totalSizes.length, parallelism).map {
+          reduceIds => Future {
+            for (s <- statuses; i <- reduceIds) {
+              totalSizes(i) += s.getSizeForBlock(i)
+            }
+          } (executionContext)
         }
+        ThreadUtils.awaitResult(Future.sequence(mapStatusSubmitTasks), Duration.Inf)
       }
       new MapOutputStatistics(dep.shuffleId, totalSizes)
     }

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -485,4 +485,20 @@ package object config {
         "array in the sorter.")
       .intConf
       .createWithDefault(Integer.MAX_VALUE)
+
+  private[spark] val SHUFFLE_MAP_OUTPUT_STATISTICS_PARALLEL_AGGREGATION_THRESHOLD =
+    ConfigBuilder("spark.shuffle.mapOutputStatistics.parallelAggregationThreshold")
+      .internal()
+      .doc("Multi-thread is used when the number of mappers * shuffle partitions exceeds this " +
+        "threshold.")
+      .intConf
+      .createWithDefault(10000000)
+
+  private[spark] val SHUFFLE_MAP_OUTPUT_STATISTICS_CORES =
+    ConfigBuilder("spark.shuffle.mapOutputStatistics.cores")
+      .internal()
+      .doc("The cores will be used during map output statistics parallel aggregation.")
+      .intConf
+      .createWithDefault(8)
+
 }
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -275,4 +275,27 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     }
   }
 
+  test("equally divide map statistics tasks") {
+    val func = newTrackerMaster().equallyDivide _
+    val cases = Seq((0, 5), (4, 5), (15, 5), (16, 5), (17, 5), (18, 5), (19, 5), (20, 5))
+    val expects = Seq(
+      Seq(0, 0, 0, 0, 0),
+      Seq(1, 1, 1, 1, 0),
+      Seq(3, 3, 3, 3, 3),
+      Seq(3, 3, 3, 3, 4),
+      Seq(3, 3, 3, 4, 4),
+      Seq(3, 3, 4, 4, 4),
+      Seq(3, 4, 4, 4, 4),
+      Seq(4, 4, 4, 4, 4))
+    cases.zip(expects).foreach { case ((num, divisor), expect) =>
+      val answer = func(num, divisor).toSeq
+      var wholeSplit = (0 until num)
+      answer.zip(expect).foreach { case (split, expectSplitLength) =>
+        val (currentSplit, rest) = wholeSplit.splitAt(expectSplitLength)
+        assert(currentSplit.toSet == split.toSet)
+        wholeSplit = rest
+      }
+    }
+  }
+
 }