apache · archit279thakur · May 19, 2015 · May 19, 2015 · Nov 4, 2015 · Nov 5, 2015
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -59,6 +59,7 @@ class ExecutorSummary private[spark](
     val totalInputBytes: Long,
     val totalShuffleRead: Long,
     val totalShuffleWrite: Long,
+    val isAlive: Boolean,
     val maxMemory: Long,
     val executorLogs: Map[String, String])
 

diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
@@ -17,26 +17,55 @@
 
 package org.apache.spark.storage
 
+import java.util.concurrent.TimeUnit
+
+import scala.collection.JavaConversions.collectionAsScalaIterable
 import scala.collection.mutable
+import scala.language.reflectiveCalls
 
+import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.scheduler._
 
+import com.google.common.base.Ticker
+import com.google.common.cache.CacheBuilder
+
 /**
  * :: DeveloperApi ::
  * A SparkListener that maintains executor storage status.
  *
  * This class is thread-safe (unlike JobProgressListener)
  */
+
+object StorageStatusListener {
+  val TIME_TO_EXPIRE_KILLED_EXECUTOR = "spark.ui.timeToExpireKilledExecutor"
+}
+
 @DeveloperApi
-class StorageStatusListener extends SparkListener {
+class StorageStatusListener(conf: SparkConf) extends SparkListener {
+  var ticker = Ticker.systemTicker()
+
+  private [storage] def this(conf: SparkConf, ticker: Ticker) = {
+    this(conf)
+    this.ticker = ticker
+  }
+
+  import StorageStatusListener._
+
   // This maintains only blocks that are cached (i.e. storage level is not StorageLevel.NONE)
   private[storage] val executorIdToStorageStatus = mutable.Map[String, StorageStatus]()
+  private[storage] val removedExecutorIdToStorageStatus = CacheBuilder.newBuilder().
+    expireAfterWrite(conf.getTimeAsSeconds(TIME_TO_EXPIRE_KILLED_EXECUTOR, "0"), TimeUnit.SECONDS).
+    ticker(ticker).build[String, StorageStatus]()
 
   def storageStatusList: Seq[StorageStatus] = synchronized {
     executorIdToStorageStatus.values.toSeq
   }
-
+
+  def removedExecutorStorageStatusList: Seq[StorageStatus] = synchronized{
+    removedExecutorIdToStorageStatus.asMap().values().toSeq
+  }
+
   /** Update storage status list to reflect updated block statuses */
   private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) {
     executorIdToStorageStatus.get(execId).foreach { storageStatus =>
@@ -87,6 +116,8 @@ class StorageStatusListener extends SparkListener {
   override def onBlockManagerRemoved(blockManagerRemoved: SparkListenerBlockManagerRemoved) {
     synchronized {
       val executorId = blockManagerRemoved.blockManagerId.executorId
+      removedExecutorIdToStorageStatus.put(executorId,
+          executorIdToStorageStatus.get(executorId).get)
       executorIdToStorageStatus.remove(executorId)
     }
   }

diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -175,7 +175,7 @@ private[spark] object SparkUI {
     }
 
     val environmentListener = new EnvironmentListener
-    val storageStatusListener = new StorageStatusListener
+    val storageStatusListener = new StorageStatusListener(conf)
     val executorsListener = new ExecutorsListener(storageStatusListener)
     val storageListener = new StorageListener(storageStatusListener)
     val operationGraphListener = new RDDOperationGraphListener(conf)

diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -25,6 +25,7 @@ import scala.xml.Node
 import org.apache.spark.status.api.v1.ExecutorSummary
 import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
+import org.apache.spark.storage.StorageStatus
 
 // This isn't even used anymore -- but we need to keep it b/c of a MiMa false positive
 private[ui] case class ExecutorSummaryInfo(
@@ -41,6 +42,7 @@ private[ui] case class ExecutorSummaryInfo(
     totalInputBytes: Long,
     totalShuffleRead: Long,
     totalShuffleWrite: Long,
+    isAlive: Boolean, 
     maxMemory: Long,
     executorLogs: Map[String, String])
 
@@ -49,15 +51,15 @@ private[ui] class ExecutorsPage(
     parent: ExecutorsTab,
     threadDumpEnabled: Boolean)
   extends WebUIPage("") {
+
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    val storageStatusList = listener.storageStatusList
+    val storageStatusList = listener.storageStatusList ++ listener.removedExecutorStorageStatusList
     val maxMem = storageStatusList.map(_.maxMem).sum
     val memUsed = storageStatusList.map(_.memUsed).sum
     val diskUsed = storageStatusList.map(_.diskUsed).sum
-    val execInfo = for (statusId <- 0 until storageStatusList.size) yield
-      ExecutorsPage.getExecInfo(listener, statusId)
+    val execInfo = for (statusId <- 0 until storageStatusList.size) yield ExecutorsPage.getExecInfo(listener, statusId)
     val execInfoSorted = execInfo.sortBy(_.id)
     val logsExist = execInfo.filter(_.executorLogs.nonEmpty).nonEmpty
 
@@ -84,6 +86,7 @@ private[ui] class ExecutorsPage(
               Shuffle Write
             </span>
           </th>
+          <th>Executor Status</th>
           {if (logsExist) <th class="sorttable_nosort">Logs</th> else Seq.empty}
           {if (threadDumpEnabled) <th class="sorttable_nosort">Thread Dump</th> else Seq.empty}
         </thead>
@@ -144,6 +147,9 @@ private[ui] class ExecutorsPage(
       <td sorttable_customkey={info.totalShuffleWrite.toString}>
         {Utils.bytesToString(info.totalShuffleWrite)}
       </td>
+      <td sorttable_customkey={info.isAlive.toString}>
+      {if(info.isAlive) "Alive" else "Killed"}
+      </td>
       {
         if (logsExist) {
           <td>
@@ -177,13 +183,15 @@ private[ui] class ExecutorsPage(
 private[spark] object ExecutorsPage {
   /** Represent an executor's info as a map given a storage status index */
   def getExecInfo(listener: ExecutorsListener, statusId: Int): ExecutorSummary = {
+
     val status = listener.storageStatusList(statusId)
     val execId = status.blockManagerId.executorId
     val hostPort = status.blockManagerId.hostPort
     val rddBlocks = status.numBlocks
     val memUsed = status.memUsed
     val maxMem = status.maxMem
     val diskUsed = status.diskUsed
+    val isAlive = listener.storageStatusList.contains(status)
     val activeTasks = listener.executorToTasksActive.getOrElse(execId, 0)
     val failedTasks = listener.executorToTasksFailed.getOrElse(execId, 0)
     val completedTasks = listener.executorToTasksComplete.getOrElse(execId, 0)
@@ -193,7 +201,7 @@ private[spark] object ExecutorsPage {
     val totalShuffleRead = listener.executorToShuffleRead.getOrElse(execId, 0L)
     val totalShuffleWrite = listener.executorToShuffleWrite.getOrElse(execId, 0L)
     val executorLogs = listener.executorToLogUrls.getOrElse(execId, Map.empty)
-
+    
     new ExecutorSummary(
       execId,
       hostPort,
@@ -208,6 +216,7 @@ private[spark] object ExecutorsPage {
       totalInputBytes,
       totalShuffleRead,
       totalShuffleWrite,
+      isAlive, 
       maxMem,
       executorLogs
     )

diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
@@ -58,6 +58,9 @@ class ExecutorsListener(storageStatusListener: StorageStatusListener) extends Sp
   val executorIdToData = HashMap[String, ExecutorUIData]()
 
   def storageStatusList: Seq[StorageStatus] = storageStatusListener.storageStatusList
+  def removedExecutorStorageStatusList: Seq[StorageStatus] = { 
+    storageStatusListener.removedExecutorStorageStatusList
+  }
 
   override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = synchronized {
     val eid = executorAdded.executorId

diff --git a/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala
@@ -17,10 +17,16 @@
 
 package org.apache.spark.storage
 
-import org.scalatest.FunSuite
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicLong
+
+import org.apache.spark.SparkConf
 import org.apache.spark.Success
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler._
+import org.scalatest.FunSuite
+
+import com.google.common.base.Ticker
 
 /**
  * Test the behavior of StorageStatusListener in response to all relevant events.
@@ -30,9 +36,10 @@ class StorageStatusListenerSuite extends FunSuite {
   private val bm2 = BlockManagerId("fat", "duck", 2)
   private val taskInfo1 = new TaskInfo(0, 0, 0, 0, "big", "dog", TaskLocality.ANY, false)
   private val taskInfo2 = new TaskInfo(0, 0, 0, 0, "fat", "duck", TaskLocality.ANY, false)
+  val conf = new SparkConf()
 
   test("block manager added/removed") {
-    val listener = new StorageStatusListener
+    val listener = new StorageStatusListener(conf)
 
     // Block manager add
     assert(listener.executorIdToStorageStatus.size === 0)
@@ -61,7 +68,7 @@ class StorageStatusListenerSuite extends FunSuite {
   }
 
   test("task end without updated blocks") {
-    val listener = new StorageStatusListener
+    val listener = new StorageStatusListener(conf)
     listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
     listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm2, 2000L))
     val taskMetrics = new TaskMetrics
@@ -78,7 +85,7 @@ class StorageStatusListenerSuite extends FunSuite {
   }
 
   test("task end with updated blocks") {
-    val listener = new StorageStatusListener
+    val listener = new StorageStatusListener(conf)
     listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
     listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm2, 2000L))
     val taskMetrics1 = new TaskMetrics
@@ -127,7 +134,7 @@ class StorageStatusListenerSuite extends FunSuite {
   }
 
   test("unpersist RDD") {
-    val listener = new StorageStatusListener
+    val listener = new StorageStatusListener(conf)
     listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(1L, bm1, 1000L))
     val taskMetrics1 = new TaskMetrics
     val taskMetrics2 = new TaskMetrics
@@ -150,4 +157,21 @@ class StorageStatusListenerSuite extends FunSuite {
     listener.onUnpersistRDD(SparkListenerUnpersistRDD(1))
     assert(listener.executorIdToStorageStatus("big").numBlocks === 0)
   }
+
+  test("Killed Executor Entry removed after configurable time") {
+    val localtestconf = new SparkConf().set(StorageStatusListener.TIME_TO_EXPIRE_KILLED_EXECUTOR,"5s")
+    val ticker = new Ticker {
+      val nanos = new AtomicLong()
+      def advance(time: Long, timeUnit: TimeUnit) = {
+        nanos.addAndGet(timeUnit.toNanos(time))
+      }
+      override def read() = {
+        nanos.getAndAdd(0)
+      }
+    }
+    val listener = new StorageStatusListener(localtestconf, ticker)
+    listener.removedExecutorIdToStorageStatus.put("1", new StorageStatus(null, 50))
+    ticker.advance(5, TimeUnit.SECONDS)
+    assert(listener.removedExecutorIdToStorageStatus.asMap.get("1") == null)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala b/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.Success
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler._
 import org.apache.spark.storage._
+import org.apache.spark.SparkConf
 
 /**
  * Test various functionality in the StorageListener that supports the StorageTab.
@@ -43,7 +44,7 @@ class StorageTabSuite extends FunSuite with BeforeAndAfter {
 
   before {
     bus = new LiveListenerBus
-    storageStatusListener = new StorageStatusListener
+    storageStatusListener = new StorageStatusListener(new SparkConf())
     storageListener = new StorageListener(storageStatusListener)
     bus.addListener(storageStatusListener)
     bus.addListener(storageListener)