Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/apache/spark into reduce-…
Browse files Browse the repository at this point in the history
…locations
  • Loading branch information
shivaram committed Mar 3, 2015
2 parents bc4dfd6 + 2db6a85 commit 0171d3c
Show file tree
Hide file tree
Showing 324 changed files with 10,954 additions and 4,487 deletions.
16 changes: 16 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

========================================================================
For TestTimSort (core/src/test/java/org/apache/spark/util/collection/TestTimSort.java):
========================================================================
Copyright (C) 2015 Stijn de Gouw

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

========================================================================
For LimitedInputStream
Expand Down
6 changes: 3 additions & 3 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@
<artifact>org.jblas:jblas</artifact>
<excludes>
<!-- Linux amd64 is OK; not statically linked -->
<exclude>lib/Linux/i386/**</exclude>
<exclude>lib/Mac OS X/**</exclude>
<exclude>lib/Windows/**</exclude>
<exclude>lib/static/Linux/i386/**</exclude>
<exclude>lib/static/Mac OS X/**</exclude>
<exclude>lib/static/Windows/**</exclude>
</excludes>
</filter>
</filters>
Expand Down
9 changes: 9 additions & 0 deletions conf/metrics.properties.template
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@

#worker.sink.csv.unit=minutes

# Enable Slf4jSink for all instances by class name
#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink

# Polling period for Slf4JSink
#*.sink.sl4j.period=1

#*.sink.sl4j.unit=minutes


# Enable jvm source for instance master, worker, driver and executor
#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,15 +425,14 @@ private void pushRun(int runBase, int runLen) {
private void mergeCollapse() {
while (stackSize > 1) {
int n = stackSize - 2;
if (n > 0 && runLen[n-1] <= runLen[n] + runLen[n+1]) {
if ( (n >= 1 && runLen[n-1] <= runLen[n] + runLen[n+1])
|| (n >= 2 && runLen[n-2] <= runLen[n] + runLen[n-1])) {
if (runLen[n - 1] < runLen[n + 1])
n--;
mergeAt(n);
} else if (runLen[n] <= runLen[n + 1]) {
mergeAt(n);
} else {
} else if (runLen[n] > runLen[n + 1]) {
break; // Invariant is established
}
mergeAt(n);
}
}

Expand Down
42 changes: 34 additions & 8 deletions core/src/main/scala/org/apache/spark/Accumulators.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import java.lang.ThreadLocal

import scala.collection.generic.Growable
import scala.collection.mutable.Map
import scala.ref.WeakReference
import scala.reflect.ClassTag

import org.apache.spark.serializer.JavaSerializer
Expand Down Expand Up @@ -279,13 +280,24 @@ object AccumulatorParam {

// TODO: The multi-thread support in accumulators is kind of lame; check
// if there's a more intuitive way of doing it right
private[spark] object Accumulators {
// TODO: Use soft references? => need to make readObject work properly then
val originals = Map[Long, Accumulable[_, _]]()
val localAccums = new ThreadLocal[Map[Long, Accumulable[_, _]]]() {
private[spark] object Accumulators extends Logging {
/**
* This global map holds the original accumulator objects that are created on the driver.
* It keeps weak references to these objects so that accumulators can be garbage-collected
* once the RDDs and user-code that reference them are cleaned up.
*/
val originals = Map[Long, WeakReference[Accumulable[_, _]]]()

/**
* This thread-local map holds per-task copies of accumulators; it is used to collect the set
* of accumulator updates to send back to the driver when tasks complete. After tasks complete,
* this map is cleared by `Accumulators.clear()` (see Executor.scala).
*/
private val localAccums = new ThreadLocal[Map[Long, Accumulable[_, _]]]() {
override protected def initialValue() = Map[Long, Accumulable[_, _]]()
}
var lastId: Long = 0

private var lastId: Long = 0

def newId(): Long = synchronized {
lastId += 1
Expand All @@ -294,7 +306,7 @@ private[spark] object Accumulators {

def register(a: Accumulable[_, _], original: Boolean): Unit = synchronized {
if (original) {
originals(a.id) = a
originals(a.id) = new WeakReference[Accumulable[_, _]](a)
} else {
localAccums.get()(a.id) = a
}
Expand All @@ -303,7 +315,13 @@ private[spark] object Accumulators {
// Clear the local (non-original) accumulators for the current thread
def clear() {
synchronized {
localAccums.get.clear
localAccums.get.clear()
}
}

def remove(accId: Long) {
synchronized {
originals.remove(accId)
}
}

Expand All @@ -320,7 +338,15 @@ private[spark] object Accumulators {
def add(values: Map[Long, Any]): Unit = synchronized {
for ((id, value) <- values) {
if (originals.contains(id)) {
originals(id).asInstanceOf[Accumulable[Any, Any]] ++= value
// Since we are now storing weak references, we must check whether the underlying data
// is valid.
originals(id).get match {
case Some(accum) => accum.asInstanceOf[Accumulable[Any, Any]] ++= value
case None =>
throw new IllegalAccessError("Attempted to access garbage collected Accumulator.")
}
} else {
logWarning(s"Ignoring accumulator update for unknown accumulator id $id")
}
}
}
Expand Down
24 changes: 22 additions & 2 deletions core/src/main/scala/org/apache/spark/ContextCleaner.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ private sealed trait CleanupTask
private case class CleanRDD(rddId: Int) extends CleanupTask
private case class CleanShuffle(shuffleId: Int) extends CleanupTask
private case class CleanBroadcast(broadcastId: Long) extends CleanupTask
private case class CleanAccum(accId: Long) extends CleanupTask

/**
* A WeakReference associated with a CleanupTask.
Expand Down Expand Up @@ -114,6 +115,10 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
registerForCleanup(rdd, CleanRDD(rdd.id))
}

def registerAccumulatorForCleanup(a: Accumulable[_, _]): Unit = {
registerForCleanup(a, CleanAccum(a.id))
}

/** Register a ShuffleDependency for cleanup when it is garbage collected. */
def registerShuffleForCleanup(shuffleDependency: ShuffleDependency[_, _, _]) {
registerForCleanup(shuffleDependency, CleanShuffle(shuffleDependency.shuffleId))
Expand Down Expand Up @@ -145,6 +150,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks)
case CleanBroadcast(broadcastId) =>
doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
case CleanAccum(accId) =>
doCleanupAccum(accId, blocking = blockOnCleanupTasks)
}
}
} catch {
Expand Down Expand Up @@ -181,15 +188,27 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
/** Perform broadcast cleanup. */
def doCleanupBroadcast(broadcastId: Long, blocking: Boolean) {
try {
logDebug("Cleaning broadcast " + broadcastId)
logDebug(s"Cleaning broadcast $broadcastId")
broadcastManager.unbroadcast(broadcastId, true, blocking)
listeners.foreach(_.broadcastCleaned(broadcastId))
logInfo("Cleaned broadcast " + broadcastId)
logDebug(s"Cleaned broadcast $broadcastId")
} catch {
case e: Exception => logError("Error cleaning broadcast " + broadcastId, e)
}
}

/** Perform accumulator cleanup. */
def doCleanupAccum(accId: Long, blocking: Boolean) {
try {
logDebug("Cleaning accumulator " + accId)
Accumulators.remove(accId)
listeners.foreach(_.accumCleaned(accId))
logInfo("Cleaned accumulator " + accId)
} catch {
case e: Exception => logError("Error cleaning accumulator " + accId, e)
}
}

private def blockManagerMaster = sc.env.blockManager.master
private def broadcastManager = sc.env.broadcastManager
private def mapOutputTrackerMaster = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
Expand All @@ -206,4 +225,5 @@ private[spark] trait CleanerListener {
def rddCleaned(rddId: Int)
def shuffleCleaned(shuffleId: Int)
def broadcastCleaned(broadcastId: Long)
def accumCleaned(accId: Long)
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark
import scala.collection.mutable

import org.apache.spark.scheduler._
import org.apache.spark.util.{SystemClock, Clock}

/**
* An agent that dynamically allocates and removes executors based on the workload.
Expand Down Expand Up @@ -123,7 +124,7 @@ private[spark] class ExecutorAllocationManager(
private val intervalMillis: Long = 100

// Clock used to schedule when executors should be added and removed
private var clock: Clock = new RealClock
private var clock: Clock = new SystemClock()

// Listener for Spark events that impact the allocation policy
private val listener = new ExecutorAllocationListener
Expand Down Expand Up @@ -588,28 +589,3 @@ private[spark] class ExecutorAllocationManager(
private object ExecutorAllocationManager {
val NOT_SET = Long.MaxValue
}

/**
* An abstract clock for measuring elapsed time.
*/
private trait Clock {
def getTimeMillis: Long
}

/**
* A clock backed by a monotonically increasing time source.
* The time returned by this clock does not correspond to any notion of wall-clock time.
*/
private class RealClock extends Clock {
override def getTimeMillis: Long = System.nanoTime / (1000 * 1000)
}

/**
* A clock that allows the caller to customize the time.
* This is used mainly for testing.
*/
private class TestClock(startTimeMillis: Long) extends Clock {
private var time: Long = startTimeMillis
override def getTimeMillis: Long = time
def tick(ms: Long): Unit = { time += ms }
}
65 changes: 59 additions & 6 deletions core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,33 +17,86 @@

package org.apache.spark

import akka.actor.Actor
import scala.concurrent.duration._
import scala.collection.mutable

import akka.actor.{Actor, Cancellable}

import org.apache.spark.executor.TaskMetrics
import org.apache.spark.storage.BlockManagerId
import org.apache.spark.scheduler.TaskScheduler
import org.apache.spark.scheduler.{SlaveLost, TaskScheduler}
import org.apache.spark.util.ActorLogReceive

/**
* A heartbeat from executors to the driver. This is a shared message used by several internal
* components to convey liveness or execution information for in-progress tasks.
* components to convey liveness or execution information for in-progress tasks. It will also
* expire the hosts that have not heartbeated for more than spark.network.timeout.
*/
private[spark] case class Heartbeat(
executorId: String,
taskMetrics: Array[(Long, TaskMetrics)], // taskId -> TaskMetrics
blockManagerId: BlockManagerId)

private[spark] case object ExpireDeadHosts

private[spark] case class HeartbeatResponse(reregisterBlockManager: Boolean)

/**
* Lives in the driver to receive heartbeats from executors..
*/
private[spark] class HeartbeatReceiver(scheduler: TaskScheduler)
private[spark] class HeartbeatReceiver(sc: SparkContext, scheduler: TaskScheduler)
extends Actor with ActorLogReceive with Logging {

// executor ID -> timestamp of when the last heartbeat from this executor was received
private val executorLastSeen = new mutable.HashMap[String, Long]

private val executorTimeoutMs = sc.conf.getLong("spark.network.timeout",
sc.conf.getLong("spark.storage.blockManagerSlaveTimeoutMs", 120)) * 1000

private val checkTimeoutIntervalMs = sc.conf.getLong("spark.network.timeoutInterval",
sc.conf.getLong("spark.storage.blockManagerTimeoutIntervalMs", 60)) * 1000

private var timeoutCheckingTask: Cancellable = null

override def preStart(): Unit = {
import context.dispatcher
timeoutCheckingTask = context.system.scheduler.schedule(0.seconds,
checkTimeoutIntervalMs.milliseconds, self, ExpireDeadHosts)
super.preStart()
}

override def receiveWithLogging = {
case Heartbeat(executorId, taskMetrics, blockManagerId) =>
val response = HeartbeatResponse(
!scheduler.executorHeartbeatReceived(executorId, taskMetrics, blockManagerId))
val unknownExecutor = !scheduler.executorHeartbeatReceived(
executorId, taskMetrics, blockManagerId)
val response = HeartbeatResponse(reregisterBlockManager = unknownExecutor)
executorLastSeen(executorId) = System.currentTimeMillis()
sender ! response
case ExpireDeadHosts =>
expireDeadHosts()
}

private def expireDeadHosts(): Unit = {
logTrace("Checking for hosts with no recent heartbeats in HeartbeatReceiver.")
val now = System.currentTimeMillis()
for ((executorId, lastSeenMs) <- executorLastSeen) {
if (now - lastSeenMs > executorTimeoutMs) {
logWarning(s"Removing executor $executorId with no recent heartbeats: " +
s"${now - lastSeenMs} ms exceeds timeout $executorTimeoutMs ms")
scheduler.executorLost(executorId, SlaveLost("Executor heartbeat " +
"timed out after ${now - lastSeenMs} ms"))
if (sc.supportDynamicAllocation) {
sc.killExecutor(executorId)
}
executorLastSeen.remove(executorId)
}
}
}

override def postStop(): Unit = {
if (timeoutCheckingTask != null) {
timeoutCheckingTask.cancel()
}
super.postStop()
}
}
Loading

0 comments on commit 0171d3c

Please sign in to comment.