apache · suyanNone · Jun 4, 2015 · Jun 5, 2015 · Jul 28, 2015 · squito
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -60,7 +60,9 @@ class ExecutorSummary private[spark](
     val totalShuffleRead: Long,
     val totalShuffleWrite: Long,
     val maxMemory: Long,
-    val executorLogs: Map[String, String])
+    val executorLogs: Map[String, String],
+    // Since in 1.5.0
+    val isRemoved: Boolean)
 
 class JobData private[spark](
     val jobId: Int,

diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
@@ -87,7 +87,7 @@ class StorageStatusListener extends SparkListener {
   override def onBlockManagerRemoved(blockManagerRemoved: SparkListenerBlockManagerRemoved) {
     synchronized {
       val executorId = blockManagerRemoved.blockManagerId.executorId
-      executorIdToStorageStatus.remove(executorId)
+      executorIdToStorageStatus(executorId).markAsRemoved
     }
   }
 

diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -57,6 +57,11 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
   private val _rddStorageInfo = new mutable.HashMap[Int, (Long, Long, Long, StorageLevel)]
   private var _nonRddStorageInfo: (Long, Long, Long) = (0L, 0L, 0L)
 
+  private var _blockManagerRemoved = false
+
+  def isRemoved: Boolean = _blockManagerRemoved
+  def markAsRemoved(): Unit = _blockManagerRemoved = true
+
   /** Create a storage status with an initial set of blocks, leaving the source unmodified. */
   def this(bmid: BlockManagerId, maxMem: Long, initialBlocks: Map[BlockId, BlockStatus]) {
     this(bmid, maxMem)

diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -42,7 +42,8 @@ private[ui] case class ExecutorSummaryInfo(
     totalShuffleRead: Long,
     totalShuffleWrite: Long,
     maxMemory: Long,
-    executorLogs: Map[String, String])
+    executorLogs: Map[String, String],
+    isRemoved: Boolean)
 
 
 private[ui] class ExecutorsPage(
@@ -53,9 +54,11 @@ private[ui] class ExecutorsPage(
 
   def render(request: HttpServletRequest): Seq[Node] = {
     val storageStatusList = listener.storageStatusList
-    val maxMem = storageStatusList.map(_.maxMem).sum
-    val memUsed = storageStatusList.map(_.memUsed).sum
-    val diskUsed = storageStatusList.map(_.diskUsed).sum
+    val activeStorageStatusList = storageStatusList.filter(!_.isRemoved)
+    val maxMem = activeStorageStatusList.map(_.maxMem).sum
+    val memUsed = activeStorageStatusList.map(_.memUsed).sum
+    val diskUsed = activeStorageStatusList.map(_.diskUsed).sum
+    val activeExecSize = activeStorageStatusList.size
     val execInfo = for (statusId <- 0 until storageStatusList.size) yield
       ExecutorsPage.getExecInfo(listener, statusId)
     val execInfoSorted = execInfo.sortBy(_.id)
@@ -85,7 +88,7 @@ private[ui] class ExecutorsPage(
             </span>
           </th>
           {if (logsExist) <th class="sorttable_nosort">Logs</th> else Seq.empty}
-          {if (threadDumpEnabled) <th class="sorttable_nosort">Thread Dump</th> else Seq.empty}
+          <th class="sorttable_nosort">Status</th>
         </thead>
         <tbody>
           {execInfoSorted.map(execRow(_, logsExist))}
@@ -109,7 +112,8 @@ private[ui] class ExecutorsPage(
         </div>
       </div>;
 
-    UIUtils.headerSparkPage("Executors (" + execInfo.size + ")", content, parent)
+    UIUtils.headerSparkPage("Executors (Active: " + activeExecSize +
+      ", Total Allocated: " + execInfo.size + ")", content, parent)
   }
 
   /** Render an HTML row representing an executor */
@@ -160,13 +164,15 @@ private[ui] class ExecutorsPage(
         }
       }
       {
-        if (threadDumpEnabled) {
+        if (info.isRemoved) {
+          <td>Removed</td>
+        } else if (threadDumpEnabled) {
           val encodedId = URLEncoder.encode(info.id, "UTF-8")
           <td>
             <a href={s"threadDump/?executorId=${encodedId}"}>Thread Dump</a>
           </td>
         } else {
-          Seq.empty
+          <td></td>
         }
       }
     </tr>
@@ -193,6 +199,7 @@ private[spark] object ExecutorsPage {
     val totalShuffleRead = listener.executorToShuffleRead.getOrElse(execId, 0L)
     val totalShuffleWrite = listener.executorToShuffleWrite.getOrElse(execId, 0L)
     val executorLogs = listener.executorToLogUrls.getOrElse(execId, Map.empty)
+    val isRemoved = status.isRemoved
 
     new ExecutorSummary(
       execId,
@@ -209,7 +216,8 @@ private[spark] object ExecutorsPage {
       totalShuffleRead,
       totalShuffleWrite,
       maxMem,
-      executorLogs
+      executorLogs,
+      isRemoved
     )
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
@@ -43,7 +43,8 @@ class StorageListener(storageStatusListener: StorageStatusListener) extends Bloc
 
   private[ui] val _rddInfoMap = mutable.Map[Int, RDDInfo]() // exposed for testing
 
-  def storageStatusList: Seq[StorageStatus] = storageStatusListener.storageStatusList
+  def storageStatusList: Seq[StorageStatus] =
+    storageStatusListener.storageStatusList.filter(!_.isRemoved)
 
   /** Filter RDD info to include only those with cached partitions */
   def rddInfoList: Seq[RDDInfo] = synchronized {

diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -13,5 +13,6 @@
   "totalShuffleRead" : 0,
   "totalShuffleWrite" : 13180,
   "maxMemory" : 278302556,
-  "executorLogs" : { }
+  "executorLogs" : { },
+  "isRemoved": false
 } ]
diff --git a/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/storage/StorageStatusListenerSuite.scala
@@ -50,13 +50,15 @@ class StorageStatusListenerSuite extends SparkFunSuite {
 
     // Block manager remove
     listener.onBlockManagerRemoved(SparkListenerBlockManagerRemoved(1L, bm1))
-    assert(listener.executorIdToStorageStatus.size === 1)
-    assert(!listener.executorIdToStorageStatus.get("big").isDefined)
-    assert(listener.executorIdToStorageStatus.get("fat").isDefined)
+    assert(listener.executorIdToStorageStatus.size === 2)
+    assert(listener.storageStatusList.count(!_.isRemoved) == 1)
+    assert(listener.executorIdToStorageStatus.get("big").get.isRemoved)
+    assert(!listener.executorIdToStorageStatus.get("fat").get.isRemoved)
     listener.onBlockManagerRemoved(SparkListenerBlockManagerRemoved(1L, bm2))
-    assert(listener.executorIdToStorageStatus.size === 0)
-    assert(!listener.executorIdToStorageStatus.get("big").isDefined)
-    assert(!listener.executorIdToStorageStatus.get("fat").isDefined)
+    assert(listener.executorIdToStorageStatus.size === 2)
+    assert(listener.storageStatusList.count(!_.isRemoved) == 0)
+    assert(listener.executorIdToStorageStatus.get("big").get.isRemoved)
+    assert(listener.executorIdToStorageStatus.get("fat").get.isRemoved)
   }
 
   test("task end without updated blocks") {

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
@@ -151,6 +151,10 @@ object MimaExcludes {
             ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.PartitionSpec$"),
             ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DescribeCommand"),
             ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.sources.DDLException")
+          ) ++ Seq(
+            // SPARK-8100 Make able to refer lost executor info in Spark UI
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.status.api.v1.ExecutorSummary.this")
           )
 
         case v if v.startsWith("1.4") =>