From f41698e330c517830a90309a022b072ea6406dcb Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 19 Nov 2017 14:10:19 +0900 Subject: [PATCH] Add a config to control if PySpark should use daemon or not --- .../org/apache/spark/api/python/PythonWorkerFactory.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala index fc595ae9e4563..f53c6178047f5 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala @@ -38,7 +38,12 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String // (pyspark/daemon.py) and tell it to fork new workers for our tasks. This daemon currently // only works on UNIX-based systems now because it uses signals for child management, so we can // also fall back to launching workers (pyspark/worker.py) directly. - val useDaemon = !System.getProperty("os.name").startsWith("Windows") + val useDaemon = { + val useDaemonEnabled = SparkEnv.get.conf.getBoolean("spark.python.use.daemon", true) + + // This flag is ignored on Windows as it's unable to fork. + !System.getProperty("os.name").startsWith("Windows") && useDaemonEnabled + } var daemon: Process = null val daemonHost = InetAddress.getByAddress(Array(127, 0, 0, 1))