From 340e07cbc96a31b28eca52d31baf5a337566b15b Mon Sep 17 00:00:00 2001 From: zhoukang Date: Fri, 29 Jun 2018 09:45:16 +0800 Subject: [PATCH 1/2] [SPARK][CORE] NoClassDefFoundError will not be catch up which will cause job hung --- .../scala/org/apache/spark/scheduler/DAGScheduler.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index f74425d73b392..29c8b4a81bcf2 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -1049,6 +1049,13 @@ class DAGScheduler( abortStage(stage, s"Task serialization failed: $e\n${Utils.exceptionString(e)}", Some(e)) runningStages -= stage return + + case e: NoClassDefFoundError => + abortStage(stage, s"Task serialization failed: $e\n${Utils.exceptionString(e)}", Some(e)) + runningStages -= stage + + // Abort execution + return } val tasks: Seq[Task[_]] = try { From 5a409cee39fb1c4f9898cc61f5330a0a2f624bd4 Mon Sep 17 00:00:00 2001 From: zhoukang Date: Wed, 19 Dec 2018 19:52:38 +0800 Subject: [PATCH 2/2] Refine comment --- .../scala/org/apache/spark/scheduler/DAGScheduler.scala | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 29c8b4a81bcf2..6ee15fde5c71f 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -1045,14 +1045,9 @@ class DAGScheduler( // Abort execution return - case NonFatal(e) => + case e: Throwable => abortStage(stage, s"Task serialization failed: $e\n${Utils.exceptionString(e)}", Some(e)) runningStages -= stage - return - - case e: NoClassDefFoundError => - abortStage(stage, s"Task serialization failed: $e\n${Utils.exceptionString(e)}", Some(e)) - runningStages -= stage // Abort execution return