From 9252904ab084a8aadd7ca17b37f5ace257e91e1e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 30 Apr 2016 07:47:48 +0800 Subject: [PATCH] new accumulator shoule be tolerant of local RPC message delivery --- .../org/apache/spark/scheduler/TaskSchedulerImpl.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 776a3226cc78d..8fa4aa121c123 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -389,9 +389,14 @@ private[spark] class TaskSchedulerImpl( // (taskId, stageId, stageAttemptId, accumUpdates) val accumUpdatesWithTaskIds: Array[(Long, Int, Int, Seq[AccumulableInfo])] = synchronized { accumUpdates.flatMap { case (id, updates) => + // We should call `acc.value` here as we are at driver side now. However, the RPC framework + // optimizes local message delivery so that messages do not need to de serialized and + // deserialized. This brings trouble to the accumulator framework, which depends on + // serialization to set the `atDriverSide` flag. Here we call `acc.localValue` instead to + // be more robust about this issue. + val accInfos = updates.map(acc => acc.toInfo(Some(acc.localValue), None)) taskIdToTaskSetManager.get(id).map { taskSetMgr => - (id, taskSetMgr.stageId, taskSetMgr.taskSet.stageAttemptId, - updates.map(acc => acc.toInfo(Some(acc.value), None))) + (id, taskSetMgr.stageId, taskSetMgr.taskSet.stageAttemptId, accInfos) } } }