From 6d874a6d5767a90bf64b8c54d3573f309a71c8cf Mon Sep 17 00:00:00 2001 From: Lianhui Wang Date: Thu, 16 Jul 2015 16:09:00 +0800 Subject: [PATCH 1/2] support pyspark for yarn-client --- python/pyspark/context.py | 5 +++++ .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python/pyspark/context.py b/python/pyspark/context.py index d7466729b8f36..b59fed17eb5f2 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -152,6 +152,11 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, self.master = self._conf.get("spark.master") self.appName = self._conf.get("spark.app.name") self.sparkHome = self._conf.get("spark.home", None) + + # Let YARN know it's a pyspark app, so it distributes needed libraries. + if self.master == "yarn-client": + self._conf.set("spark.yarn.isPython","true") + for (k, v) in self._conf.getAll(): if k.startswith("spark.executorEnv."): varName = k[len("spark.executorEnv."):] diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index f86b6d1e5d7bc..f44fb59b0567a 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -616,7 +616,8 @@ private[spark] class Client( val appId = newAppResponse.getApplicationId val appStagingDir = getAppStagingDir(appId) val pySparkArchives = - if (sys.props.getOrElse("spark.yarn.isPython", "false").toBoolean) { + if (sys.props.getOrElse("spark.yarn.isPython", "false").toBoolean || + sparkConf.getBoolean("spark.yarn.isPython", false)) { findPySparkArchives() } else { Nil From cb3f12da607b411a70fca94d9b838619aaac1b14 Mon Sep 17 00:00:00 2001 From: Lianhui Wang Date: Thu, 16 Jul 2015 16:49:44 +0800 Subject: [PATCH 2/2] add whitespace --- python/pyspark/context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/context.py b/python/pyspark/context.py index b59fed17eb5f2..43bde5ae41e23 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -155,7 +155,7 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, # Let YARN know it's a pyspark app, so it distributes needed libraries. if self.master == "yarn-client": - self._conf.set("spark.yarn.isPython","true") + self._conf.set("spark.yarn.isPython", "true") for (k, v) in self._conf.getAll(): if k.startswith("spark.executorEnv."):