From 8416b7f83c86aa17ed2d7ffdf4a6aea40e48416a Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 18 Aug 2015 15:45:46 +0800 Subject: [PATCH] Skip re-computing getMissingParentStages. --- .../scala/org/apache/spark/scheduler/DAGScheduler.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index dadf83a38296d..078676290bdac 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -732,7 +732,8 @@ class DAGScheduler( job.jobId, callSite.shortForm, partitions.length)) logInfo("Final stage: " + finalStage + "(" + finalStage.name + ")") logInfo("Parents of final stage: " + finalStage.parents) - logInfo("Missing parents: " + getMissingParentStages(finalStage)) + val missingStages = getMissingParentStages(finalStage) + logInfo("Missing parents: " + missingStages) val jobSubmissionTime = clock.getTimeMillis() jobIdToActiveJob(jobId) = job activeJobs += job @@ -741,18 +742,18 @@ class DAGScheduler( val stageInfos = stageIds.flatMap(id => stageIdToStage.get(id).map(_.latestInfo)) listenerBus.post( SparkListenerJobStart(job.jobId, jobSubmissionTime, stageInfos, properties)) - submitStage(finalStage) + submitStage(finalStage, Some(missingStages)) } submitWaitingStages() } /** Submits stage, but first recursively submits any missing parents. */ - private def submitStage(stage: Stage) { + private def submitStage(stage: Stage, missingStages: Option[List[Stage]] = None) { val jobId = activeJobForStage(stage) if (jobId.isDefined) { logDebug("submitStage(" + stage + ")") if (!waitingStages(stage) && !runningStages(stage) && !failedStages(stage)) { - val missing = getMissingParentStages(stage).sortBy(_.id) + val missing = missingStages.getOrElse(getMissingParentStages(stage)).sortBy(_.id) logDebug("missing: " + missing) if (missing.isEmpty) { logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")