-
Notifications
You must be signed in to change notification settings - Fork 28.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-32736][CORE] Avoid caching the removed decommissioned executors in TaskSchedulerImpl #29579
Changes from 5 commits
d5bc756
404f92b
47da0d7
3152e9b
a0bc4f6
75a14a6
b6490fc
bea465b
c12c82d
90f1fd1
84df735
0c0749e
6e8b57e
a39ba8e
ff02621
9096cb9
58add67
d246840
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -308,7 +308,7 @@ private[deploy] class Master( | |
appInfo.resetRetryCount() | ||
} | ||
|
||
exec.application.driver.send(ExecutorUpdated(execId, state, message, exitStatus, false)) | ||
exec.application.driver.send(ExecutorUpdated(execId, state, message, exitStatus, None)) | ||
|
||
if (ExecutorState.isFinished(state)) { | ||
// Remove this executor from the worker and app | ||
|
@@ -909,9 +909,9 @@ private[deploy] class Master( | |
exec.application.driver.send(ExecutorUpdated( | ||
exec.id, ExecutorState.DECOMMISSIONED, | ||
Some("worker decommissioned"), None, | ||
// workerLost is being set to true here to let the driver know that the host (aka. worker) | ||
// worker host is being set here to let the driver know that the host (aka. worker) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can you reword the comment to be more accurate now :-) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated a little bit. |
||
// is also being decommissioned. | ||
workerLost = true)) | ||
Some(worker.host))) | ||
exec.state = ExecutorState.DECOMMISSIONED | ||
exec.application.removeExecutor(exec) | ||
} | ||
|
@@ -932,7 +932,7 @@ private[deploy] class Master( | |
for (exec <- worker.executors.values) { | ||
logInfo("Telling app of lost executor: " + exec.id) | ||
exec.application.driver.send(ExecutorUpdated( | ||
exec.id, ExecutorState.LOST, Some("worker lost"), None, workerLost = true)) | ||
exec.id, ExecutorState.LOST, Some("worker lost"), None, Some(worker.host))) | ||
exec.state = ExecutorState.LOST | ||
exec.application.removeExecutor(exec) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1826,7 +1826,7 @@ private[spark] class DAGScheduler( | |
val externalShuffleServiceEnabled = env.blockManager.externalShuffleServiceEnabled | ||
val isHostDecommissioned = taskScheduler | ||
.getExecutorDecommissionState(bmAddress.executorId) | ||
.exists(_.isHostDecommissioned) | ||
.exists(_.hostOpt.isDefined) | ||
|
||
// Shuffle output of all executors on host `bmAddress.host` may be lost if: | ||
// - External shuffle service is enabled, so we assume that all shuffle data on node is | ||
|
@@ -1989,15 +1989,15 @@ private[spark] class DAGScheduler( | |
*/ | ||
private[scheduler] def handleExecutorLost( | ||
execId: String, | ||
workerLost: Boolean): Unit = { | ||
hostOpt: Option[String]): Unit = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you change this method's comment also if you decide to go with hostOpt instead of workerLost (perhaps you ought to consider my consider my comment on making workerLost itself be an Optional[String]). The comment still refers to "standalone worker" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed it to |
||
// if the cluster manager explicitly tells us that the entire worker was lost, then | ||
// we know to unregister shuffle output. (Note that "worker" specifically refers to the process | ||
// from a Standalone cluster, where the shuffle service lives in the Worker.) | ||
val fileLost = workerLost || !env.blockManager.externalShuffleServiceEnabled | ||
val fileLost = hostOpt.isDefined || !env.blockManager.externalShuffleServiceEnabled | ||
removeExecutorAndUnregisterOutputs( | ||
execId = execId, | ||
fileLost = fileLost, | ||
hostToUnregisterOutputs = None, | ||
hostToUnregisterOutputs = hostOpt, | ||
maybeEpoch = None) | ||
} | ||
|
||
|
@@ -2366,11 +2366,12 @@ private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler | |
dagScheduler.handleExecutorAdded(execId, host) | ||
|
||
case ExecutorLost(execId, reason) => | ||
val workerLost = reason match { | ||
case ExecutorProcessLost(_, true, _) => true | ||
case _ => false | ||
val hostOpt = reason match { | ||
case ExecutorProcessLost(_, host, _) => host | ||
case ExecutorDecommission(host) => host | ||
Ngone51 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
case _ => None | ||
} | ||
dagScheduler.handleExecutorLost(execId, workerLost) | ||
dagScheduler.handleExecutorLost(execId, hostOpt) | ||
|
||
case WorkerRemoved(workerId, host, message) => | ||
dagScheduler.handleWorkerRemoved(workerId, host, message) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,14 +53,14 @@ private [spark] object LossReasonPending extends ExecutorLossReason("Pending los | |
|
||
/** | ||
* @param _message human readable loss reason | ||
* @param workerLost whether the worker is confirmed lost too (i.e. including shuffle service) | ||
* @param hostOpt it's defined when the host is confirmed lost too (i.e. including shuffle service) | ||
* @param causedByApp whether the loss of the executor is the fault of the running app. | ||
* (assumed true by default unless known explicitly otherwise) | ||
*/ | ||
private[spark] | ||
case class ExecutorProcessLost( | ||
_message: String = "Executor Process Lost", | ||
workerLost: Boolean = false, | ||
hostOpt: Option[String] = None, | ||
causedByApp: Boolean = true) | ||
extends ExecutorLossReason(_message) | ||
|
||
|
@@ -69,5 +69,8 @@ case class ExecutorProcessLost( | |
* | ||
* This is used by the task scheduler to remove state associated with the executor, but | ||
* not yet fail any tasks that were running in the executor before the executor is "fully" lost. | ||
* | ||
* @param hostOpt it will be set by [[TaskSchedulerImpl]] when the host is decommissioned too | ||
*/ | ||
private [spark] object ExecutorDecommission extends ExecutorLossReason("Executor decommission.") | ||
private [spark] case class ExecutorDecommission(var hostOpt: Option[String] = None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not a fan of this change of making the hostOpt be a var instead of a val. I think you only need this for line 932 in TaskSchedulerImpl. I am sure you would be able to accommodate that use case in a different way. The reason I don't like it is because other ExecutorLossReason's are "messages" (for example ExecutorProcessLost) and these messages tend to be immutable. I think it's a bit hacky to have ExecutorDecommission masquerading as a message but then make it be mutable. Even ExecutorDecommission is a message that the TaskSchedulerImpl enqueues into the event loop of the DAGScheduler. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TBH I don't like the way myself too. I tried another way to get rid of the problem here but requires storing the redundant |
||
extends ExecutorLossReason("Executor decommission.") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -390,16 +390,18 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
case Some(executorInfo) => | ||
// This must be synchronized because variables mutated | ||
// in this block are read when requesting executors | ||
val killed = CoarseGrainedSchedulerBackend.this.synchronized { | ||
val lossReason = CoarseGrainedSchedulerBackend.this.synchronized { | ||
addressToExecutorId -= executorInfo.executorAddress | ||
executorDataMap -= executorId | ||
executorsPendingLossReason -= executorId | ||
executorsPendingDecommission -= executorId | ||
executorsPendingToRemove.remove(executorId).getOrElse(false) | ||
val decommissioned = executorsPendingDecommission.remove(executorId) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rename decommissioned to workerHostOpt and perhaps give it an explicit type: Option[Option[String]]. Its no longer a simple boolean. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rename to |
||
executorsPendingToRemove | ||
.remove(executorId).filter(killedByDriver => killedByDriver).map(_ => ExecutorKilled) | ||
Ngone51 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.getOrElse(if (decommissioned) ExecutorDecommission() else reason) | ||
} | ||
totalCoreCount.addAndGet(-executorInfo.totalCores) | ||
totalRegisteredExecutors.addAndGet(-1) | ||
scheduler.executorLost(executorId, if (killed) ExecutorKilled else reason) | ||
scheduler.executorLost(executorId, lossReason) | ||
listenerBus.post( | ||
SparkListenerExecutorRemoved(System.currentTimeMillis(), executorId, reason.toString)) | ||
case None => | ||
|
@@ -489,17 +491,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
decomInfo: ExecutorDecommissionInfo): Boolean = { | ||
|
||
logInfo(s"Asking executor $executorId to decommissioning.") | ||
try { | ||
scheduler.executorDecommission(executorId, decomInfo) | ||
if (driverEndpoint != null) { | ||
logInfo("Propagating executor decommission to driver.") | ||
driverEndpoint.send(DecommissionExecutor(executorId, decomInfo)) | ||
} | ||
} catch { | ||
case e: Exception => | ||
logError(s"Unexpected error during decommissioning ${e.toString}", e) | ||
return false | ||
} | ||
scheduler.executorDecommission(executorId, decomInfo) | ||
Ngone51 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Send decommission message to the executor (it could have originated on the executor | ||
Ngone51 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// but not necessarily. | ||
CoarseGrainedSchedulerBackend.this.synchronized { | ||
|
@@ -656,7 +648,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp | |
!executorsPendingToRemove.contains(id) && | ||
!executorsPendingLossReason.contains(id) && | ||
!executorsPendingDecommission.contains(id) | ||
|
||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we need a better name than hostOpt ? How about just "Hostname" ? The type already conveys that this is an optional.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about hostLost ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can also leave the name as workerLost and just make it be an Optional[String] ? In the spirit of minimal code change ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After a second thought, I changed it to
workerHost
. We need the keywordworker
because it's specific to Standalone Worker. AndHost
gives the direct meaning of the value. AndworkerLost
sounds more appropriate for theBoolean
type. WDYT?