From 4a48bf02706e703fca6a4047f9b68ba67f816706 Mon Sep 17 00:00:00 2001 From: Hari Shreedharan Date: Mon, 19 Oct 2015 22:58:11 -0700 Subject: [PATCH 1/4] [SPARK-11201] [YARN] Make sure SPARK_YARN_MODE is set before SparkHadoopUtil.get is called. If `StreamingContext.getOrCreate` is used in `yarn-client` mode, a `ClassCastException` gets thrown when `Client.scala` is initialized and `YarnSparkHadoopUtil.get` is called. This fails because the `SparkHadoopUtil.get` method is called in the `getOrCreate` method default argument, before SPARK_YARN_MODE is set. --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index ad92f5635af35..9067218d910be 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -264,6 +264,7 @@ object SparkSubmit { "Could not load YARN classes. " + "This copy of Spark may not have been compiled with YARN support.") } + System.setProperty("SPARK_YARN_MODE", "true") } // Update args.deployMode if it is null. It will be passed down as a Spark property later. From fbd4b8d5ec17b881541a52c9f1876b8d978ddc21 Mon Sep 17 00:00:00 2001 From: Hari Shreedharan Date: Mon, 19 Oct 2015 23:28:11 -0700 Subject: [PATCH 2/4] Remove the multiple other places where this is set. --- core/src/main/scala/org/apache/spark/SparkContext.scala | 2 -- yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 3 --- 2 files changed, 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index ccba3ed9e643c..8c31485ab54e3 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -430,8 +430,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli _conf.set("spark.externalBlockStore.folderName", externalBlockStoreFolderName) - if (master == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true") - // "_jobProgressListener" should be set up before creating SparkEnv because when creating // "SparkEnv", some messages will be posted to "listenerBus" and we should not miss them. _jobProgressListener = new JobProgressListener(_conf) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 08aecfa7f6fe0..51584463c18be 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1025,9 +1025,6 @@ object Client extends Logging { "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"") } - // Set an env variable indicating we are running in YARN mode. - // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes - System.setProperty("SPARK_YARN_MODE", "true") val sparkConf = new SparkConf val args = new ClientArguments(argStrings, sparkConf) From 36a88265f35028107fc87761e10825a8fa3616dc Mon Sep 17 00:00:00 2001 From: Hari Shreedharan Date: Tue, 20 Oct 2015 09:40:42 -0700 Subject: [PATCH 3/4] Revert "Remove the multiple other places where this is set." This reverts commit fbd4b8d5ec17b881541a52c9f1876b8d978ddc21. --- core/src/main/scala/org/apache/spark/SparkContext.scala | 2 ++ yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 3 +++ 2 files changed, 5 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 8c31485ab54e3..ccba3ed9e643c 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -430,6 +430,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli _conf.set("spark.externalBlockStore.folderName", externalBlockStoreFolderName) + if (master == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true") + // "_jobProgressListener" should be set up before creating SparkEnv because when creating // "SparkEnv", some messages will be posted to "listenerBus" and we should not miss them. _jobProgressListener = new JobProgressListener(_conf) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 51584463c18be..08aecfa7f6fe0 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1025,6 +1025,9 @@ object Client extends Logging { "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"") } + // Set an env variable indicating we are running in YARN mode. + // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes + System.setProperty("SPARK_YARN_MODE", "true") val sparkConf = new SparkConf val args = new ClientArguments(argStrings, sparkConf) From 92aca7d0a3e956332c9adaccf597f289ff21bb98 Mon Sep 17 00:00:00 2001 From: Hari Shreedharan Date: Tue, 20 Oct 2015 09:42:04 -0700 Subject: [PATCH 4/4] Add comment explaining why the setProperty is left in the other places. --- core/src/main/scala/org/apache/spark/SparkContext.scala | 2 ++ yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 2 ++ 2 files changed, 4 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index ccba3ed9e643c..149fd52786da1 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -430,6 +430,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli _conf.set("spark.externalBlockStore.folderName", externalBlockStoreFolderName) + // This should have been set by spark-submit, but there are code paths that may not go + // through spark-submit. if (master == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true") // "_jobProgressListener" should be set up before creating SparkEnv because when creating diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 08aecfa7f6fe0..c1a9c74cb7a40 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1027,6 +1027,8 @@ object Client extends Logging { // Set an env variable indicating we are running in YARN mode. // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes + // This should have been set by spark-submit, but there are code paths that may not go + // through spark-submit. System.setProperty("SPARK_YARN_MODE", "true") val sparkConf = new SparkConf