Merge branch 'master' of https://github.com/apache/spark into reduce-…

…locations
apache · Mar 3, 2015 · 0171d3c · 0171d3c
2 parents bc4dfd6 + 2db6a85
commit 0171d3c
Show file tree

Hide file tree

Showing 324 changed files with 10,954 additions and 4,487 deletions.
diff --git a/LICENSE b/LICENSE
@@ -771,6 +771,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
+========================================================================
+For TestTimSort (core/src/test/java/org/apache/spark/util/collection/TestTimSort.java):
+========================================================================
+Copyright (C) 2015 Stijn de Gouw
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 
 ========================================================================
 For LimitedInputStream

diff --git a/assembly/pom.xml b/assembly/pom.xml
@@ -119,9 +119,9 @@
               <artifact>org.jblas:jblas</artifact>
               <excludes>
                 <!-- Linux amd64 is OK; not statically linked -->
-                <exclude>lib/Linux/i386/**</exclude>
-                <exclude>lib/Mac OS X/**</exclude>
-                <exclude>lib/Windows/**</exclude>
+                <exclude>lib/static/Linux/i386/**</exclude>
+                <exclude>lib/static/Mac OS X/**</exclude>
+                <exclude>lib/static/Windows/**</exclude>
               </excludes>
             </filter>
           </filters>

diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
@@ -122,6 +122,15 @@
 
 #worker.sink.csv.unit=minutes
 
+# Enable Slf4jSink for all instances by class name
+#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
+
+# Polling period for Slf4JSink
+#*.sink.sl4j.period=1
+
+#*.sink.sl4j.unit=minutes
+
+
 # Enable jvm source for instance master, worker, driver and executor
 #master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
 

diff --git a/core/src/main/java/org/apache/spark/util/collection/TimSort.java b/core/src/main/java/org/apache/spark/util/collection/TimSort.java
@@ -425,15 +425,14 @@ private void pushRun(int runBase, int runLen) {
     private void mergeCollapse() {
       while (stackSize > 1) {
         int n = stackSize - 2;
-        if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {
+        if ( (n >= 1 && runLen[n-1] <= runLen[n] + runLen[n+1])
+          || (n >= 2 && runLen[n-2] <= runLen[n] + runLen[n-1])) {
           if (runLen[n - 1] < runLen[n + 1])
             n--;
-          mergeAt(n);
-        } else if (runLen[n] <= runLen[n + 1]) {
-          mergeAt(n);
-        } else {
+        } else if (runLen[n] > runLen[n + 1]) {
           break; // Invariant is established
         }
+        mergeAt(n);
       }
     }
 

diff --git a/core/src/main/scala/org/apache/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala
@@ -23,6 +23,7 @@ import java.lang.ThreadLocal
 
 import scala.collection.generic.Growable
 import scala.collection.mutable.Map
+import scala.ref.WeakReference
 import scala.reflect.ClassTag
 
 import org.apache.spark.serializer.JavaSerializer
@@ -279,13 +280,24 @@ object AccumulatorParam {
 
 // TODO: The multi-thread support in accumulators is kind of lame; check
 // if there's a more intuitive way of doing it right
-private[spark] object Accumulators {
-  // TODO: Use soft references? => need to make readObject work properly then
-  val originals = Map[Long, Accumulable[_, _]]()
-  val localAccums = new ThreadLocal[Map[Long, Accumulable[_, _]]]() {
+private[spark] object Accumulators extends Logging {
+  /**
+   * This global map holds the original accumulator objects that are created on the driver.
+   * It keeps weak references to these objects so that accumulators can be garbage-collected
+   * once the RDDs and user-code that reference them are cleaned up.
+   */
+  val originals = Map[Long, WeakReference[Accumulable[_, _]]]()
+
+  /**
+   * This thread-local map holds per-task copies of accumulators; it is used to collect the set
+   * of accumulator updates to send back to the driver when tasks complete. After tasks complete,
+   * this map is cleared by `Accumulators.clear()` (see Executor.scala).
+   */
+  private val localAccums = new ThreadLocal[Map[Long, Accumulable[_, _]]]() {
     override protected def initialValue() = Map[Long, Accumulable[_, _]]()
   }
-  var lastId: Long = 0
+
+  private var lastId: Long = 0
 
   def newId(): Long = synchronized {
     lastId += 1
@@ -294,7 +306,7 @@ private[spark] object Accumulators {
 
   def register(a: Accumulable[_, _], original: Boolean): Unit = synchronized {
     if (original) {
-      originals(a.id) = a
+      originals(a.id) = new WeakReference[Accumulable[_, _]](a)
     } else {
       localAccums.get()(a.id) = a
     }
@@ -303,7 +315,13 @@ private[spark] object Accumulators {
   // Clear the local (non-original) accumulators for the current thread
   def clear() {
     synchronized {
-      localAccums.get.clear
+      localAccums.get.clear()
+    }
+  }
+
+  def remove(accId: Long) {
+    synchronized {
+      originals.remove(accId)
     }
   }
 
@@ -320,7 +338,15 @@ private[spark] object Accumulators {
   def add(values: Map[Long, Any]): Unit = synchronized {
     for ((id, value) <- values) {
       if (originals.contains(id)) {
-        originals(id).asInstanceOf[Accumulable[Any, Any]] ++= value
+        // Since we are now storing weak references, we must check whether the underlying data
+        // is valid.
+        originals(id).get match {
+          case Some(accum) => accum.asInstanceOf[Accumulable[Any, Any]] ++= value
+          case None =>
+            throw new IllegalAccessError("Attempted to access garbage collected Accumulator.")
+        }
+      } else {
+        logWarning(s"Ignoring accumulator update for unknown accumulator id $id")
       }
     }
   }

diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -32,6 +32,7 @@ private sealed trait CleanupTask
 private case class CleanRDD(rddId: Int) extends CleanupTask
 private case class CleanShuffle(shuffleId: Int) extends CleanupTask
 private case class CleanBroadcast(broadcastId: Long) extends CleanupTask
+private case class CleanAccum(accId: Long) extends CleanupTask
 
 /**
  * A WeakReference associated with a CleanupTask.
@@ -114,6 +115,10 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
     registerForCleanup(rdd, CleanRDD(rdd.id))
   }
 
+  def registerAccumulatorForCleanup(a: Accumulable[_, _]): Unit = {
+    registerForCleanup(a, CleanAccum(a.id))
+  }
+
   /** Register a ShuffleDependency for cleanup when it is garbage collected. */
   def registerShuffleForCleanup(shuffleDependency: ShuffleDependency[_, _, _]) {
     registerForCleanup(shuffleDependency, CleanShuffle(shuffleDependency.shuffleId))
@@ -145,6 +150,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
               doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks)
             case CleanBroadcast(broadcastId) =>
               doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
+            case CleanAccum(accId) =>
+              doCleanupAccum(accId, blocking = blockOnCleanupTasks)
           }
         }
       } catch {
@@ -181,15 +188,27 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   /** Perform broadcast cleanup. */
   def doCleanupBroadcast(broadcastId: Long, blocking: Boolean) {
     try {
-      logDebug("Cleaning broadcast " + broadcastId)
+      logDebug(s"Cleaning broadcast $broadcastId")
       broadcastManager.unbroadcast(broadcastId, true, blocking)
       listeners.foreach(_.broadcastCleaned(broadcastId))
-      logInfo("Cleaned broadcast " + broadcastId)
+      logDebug(s"Cleaned broadcast $broadcastId")
     } catch {
       case e: Exception => logError("Error cleaning broadcast " + broadcastId, e)
     }
   }
 
+  /** Perform accumulator cleanup. */
+  def doCleanupAccum(accId: Long, blocking: Boolean) {
+    try {
+      logDebug("Cleaning accumulator " + accId)
+      Accumulators.remove(accId)
+      listeners.foreach(_.accumCleaned(accId))
+      logInfo("Cleaned accumulator " + accId)
+    } catch {
+      case e: Exception => logError("Error cleaning accumulator " + accId, e)
+    }
+  }
+
   private def blockManagerMaster = sc.env.blockManager.master
   private def broadcastManager = sc.env.broadcastManager
   private def mapOutputTrackerMaster = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
@@ -206,4 +225,5 @@ private[spark] trait CleanerListener {
   def rddCleaned(rddId: Int)
   def shuffleCleaned(shuffleId: Int)
   def broadcastCleaned(broadcastId: Long)
+  def accumCleaned(accId: Long)
 }
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -20,6 +20,7 @@ package org.apache.spark
 import scala.collection.mutable
 
 import org.apache.spark.scheduler._
+import org.apache.spark.util.{SystemClock, Clock}
 
 /**
  * An agent that dynamically allocates and removes executors based on the workload.
@@ -123,7 +124,7 @@ private[spark] class ExecutorAllocationManager(
   private val intervalMillis: Long = 100
 
   // Clock used to schedule when executors should be added and removed
-  private var clock: Clock = new RealClock
+  private var clock: Clock = new SystemClock()
 
   // Listener for Spark events that impact the allocation policy
   private val listener = new ExecutorAllocationListener
@@ -588,28 +589,3 @@ private[spark] class ExecutorAllocationManager(
 private object ExecutorAllocationManager {
   val NOT_SET = Long.MaxValue
 }
-
-/**
- * An abstract clock for measuring elapsed time.
- */
-private trait Clock {
-  def getTimeMillis: Long
-}
-
-/**
- * A clock backed by a monotonically increasing time source.
- * The time returned by this clock does not correspond to any notion of wall-clock time.
- */
-private class RealClock extends Clock {
-  override def getTimeMillis: Long = System.nanoTime / (1000 * 1000)
-}
-
-/**
- * A clock that allows the caller to customize the time.
- * This is used mainly for testing.
- */
-private class TestClock(startTimeMillis: Long) extends Clock {
-  private var time: Long = startTimeMillis
-  override def getTimeMillis: Long = time
-  def tick(ms: Long): Unit = { time += ms }
-}
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -17,33 +17,86 @@
 
 package org.apache.spark
 
-import akka.actor.Actor
+import scala.concurrent.duration._
+import scala.collection.mutable
+
+import akka.actor.{Actor, Cancellable}
+
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.scheduler.TaskScheduler
+import org.apache.spark.scheduler.{SlaveLost, TaskScheduler}
 import org.apache.spark.util.ActorLogReceive
 
 /**
  * A heartbeat from executors to the driver. This is a shared message used by several internal
- * components to convey liveness or execution information for in-progress tasks.
+ * components to convey liveness or execution information for in-progress tasks. It will also 
+ * expire the hosts that have not heartbeated for more than spark.network.timeout.
  */
 private[spark] case class Heartbeat(
     executorId: String,
     taskMetrics: Array[(Long, TaskMetrics)], // taskId -> TaskMetrics
     blockManagerId: BlockManagerId)
 
+private[spark] case object ExpireDeadHosts 
+
 private[spark] case class HeartbeatResponse(reregisterBlockManager: Boolean)
 
 /**
  * Lives in the driver to receive heartbeats from executors..
  */
-private[spark] class HeartbeatReceiver(scheduler: TaskScheduler)
+private[spark] class HeartbeatReceiver(sc: SparkContext, scheduler: TaskScheduler)
   extends Actor with ActorLogReceive with Logging {
 
+  // executor ID -> timestamp of when the last heartbeat from this executor was received
+  private val executorLastSeen = new mutable.HashMap[String, Long]
+
+  private val executorTimeoutMs = sc.conf.getLong("spark.network.timeout", 
+    sc.conf.getLong("spark.storage.blockManagerSlaveTimeoutMs", 120)) * 1000
+
+  private val checkTimeoutIntervalMs = sc.conf.getLong("spark.network.timeoutInterval",
+    sc.conf.getLong("spark.storage.blockManagerTimeoutIntervalMs", 60)) * 1000
+
+  private var timeoutCheckingTask: Cancellable = null
+
+  override def preStart(): Unit = {
+    import context.dispatcher
+    timeoutCheckingTask = context.system.scheduler.schedule(0.seconds,
+      checkTimeoutIntervalMs.milliseconds, self, ExpireDeadHosts)
+    super.preStart()
+  }
+
   override def receiveWithLogging = {
     case Heartbeat(executorId, taskMetrics, blockManagerId) =>
-      val response = HeartbeatResponse(
-        !scheduler.executorHeartbeatReceived(executorId, taskMetrics, blockManagerId))
+      val unknownExecutor = !scheduler.executorHeartbeatReceived(
+        executorId, taskMetrics, blockManagerId)
+      val response = HeartbeatResponse(reregisterBlockManager = unknownExecutor)
+      executorLastSeen(executorId) = System.currentTimeMillis()
       sender ! response
+    case ExpireDeadHosts =>
+      expireDeadHosts()
+  }
+
+  private def expireDeadHosts(): Unit = {
+    logTrace("Checking for hosts with no recent heartbeats in HeartbeatReceiver.")
+    val now = System.currentTimeMillis()
+    for ((executorId, lastSeenMs) <- executorLastSeen) {
+      if (now - lastSeenMs > executorTimeoutMs) {
+        logWarning(s"Removing executor $executorId with no recent heartbeats: " +
+          s"${now - lastSeenMs} ms exceeds timeout $executorTimeoutMs ms")
+        scheduler.executorLost(executorId, SlaveLost("Executor heartbeat " +
+          "timed out after ${now - lastSeenMs} ms"))
+        if (sc.supportDynamicAllocation) {
+          sc.killExecutor(executorId)
+        }
+        executorLastSeen.remove(executorId)
+      }
+    }
+  }
+
+  override def postStop(): Unit = {
+    if (timeoutCheckingTask != null) {
+      timeoutCheckingTask.cancel()
+    }
+    super.postStop()
   }
 }