From 24e6c187fbaa6874eedbdda6b3b5dc6ff9e1de36 Mon Sep 17 00:00:00 2001 From: pgandhi Date: Mon, 28 Aug 2017 08:51:22 -0500 Subject: [PATCH] [SPARK-21798] No config to replace deprecated SPARK_CLASSPATH config for launching daemons like History Server History Server Launch uses SparkClassCommandBuilder for launching the server. It is observed that SPARK_CLASSPATH has been removed and deprecated. For spark-submit this takes a different route and spark.driver.extraClasspath takes care of specifying additional jars in the classpath that were previously specified in the SPARK_CLASSPATH. Right now the only way specify the additional jars for launching daemons such as history server is using SPARK_DIST_CLASSPATH (https://spark.apache.org/docs/latest/hadoop-provided.html) but this I presume is a distribution classpath. It would be nice to have a similar config like spark.driver.extraClasspath for launching daemons similar to history server. Added new environment variable SPARK_DAEMON_CLASSPATH to set classpath for launching daemons. Tested and verified for History Server and Standalone Mode. ## How was this patch tested? Initially, history server start script would fail for the reason being that it could not find the required jars for launching the server in the java classpath. Same was true for running Master and Worker in standalone mode. By adding the environment variable SPARK_DAEMON_CLASSPATH to the java classpath, both the daemons(History Server, Standalone daemons) are starting up and running. Author: pgandhi Author: pgandhi999 Closes #19047 from pgandhi999/master. --- conf/spark-env.sh.template | 1 + docs/monitoring.md | 4 ++++ docs/running-on-mesos.md | 2 ++ docs/spark-standalone.md | 4 ++++ .../org/apache/spark/launcher/SparkClassCommandBuilder.java | 5 +++++ 5 files changed, 16 insertions(+) diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template index 1663019ee5758..f8c895f5303b9 100755 --- a/conf/spark-env.sh.template +++ b/conf/spark-env.sh.template @@ -52,6 +52,7 @@ # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") +# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers # Generic options for the daemons used in the standalone deploy mode diff --git a/docs/monitoring.md b/docs/monitoring.md index 3e577c5f36778..d22cd945eaf61 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -61,6 +61,10 @@ The history server can be configured as follows: SPARK_DAEMON_JAVA_OPTS JVM options for the history server (default: none). + + SPARK_DAEMON_CLASSPATH + Classpath for the history server (default: none). + SPARK_PUBLIC_DNS diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index 0e5a20c578db3..c12b8580af063 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -160,6 +160,8 @@ If you like to run the `MesosClusterDispatcher` with Marathon, you need to run t The `MesosClusterDispatcher` also supports writing recovery state into Zookeeper. This will allow the `MesosClusterDispatcher` to be able to recover all submitted and running containers on relaunch. In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring `spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations. For more information about these configurations please refer to the configurations [doc](configurations.html#deploy). +You can also specify any additional jars required by the `MesosClusterDispatcher` in the classpath by setting the environment variable SPARK_DAEMON_CLASSPATH in spark-env. + From the client, you can submit a job to Mesos cluster by running `spark-submit` and specifying the master URL to the URL of the `MesosClusterDispatcher` (e.g: mesos://dispatcher:7077). You can view driver statuses on the Spark cluster Web UI. diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 642575b46dd42..1095386c31ab8 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -149,6 +149,10 @@ You can optionally configure the cluster further by setting environment variable SPARK_DAEMON_JAVA_OPTS JVM options for the Spark master and worker daemons themselves in the form "-Dx=y" (default: none). + + SPARK_DAEMON_CLASSPATH + Classpath for the Spark master and worker daemons themselves (default: none). + SPARK_PUBLIC_DNS The public DNS name of the Spark master and workers (default: none). diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java index 137ef74843da5..32724acdc362c 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java @@ -53,16 +53,19 @@ public List buildCommand(Map env) case "org.apache.spark.deploy.master.Master": javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS"); javaOptsKeys.add("SPARK_MASTER_OPTS"); + extraClassPath = getenv("SPARK_DAEMON_CLASSPATH"); memKey = "SPARK_DAEMON_MEMORY"; break; case "org.apache.spark.deploy.worker.Worker": javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS"); javaOptsKeys.add("SPARK_WORKER_OPTS"); + extraClassPath = getenv("SPARK_DAEMON_CLASSPATH"); memKey = "SPARK_DAEMON_MEMORY"; break; case "org.apache.spark.deploy.history.HistoryServer": javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS"); javaOptsKeys.add("SPARK_HISTORY_OPTS"); + extraClassPath = getenv("SPARK_DAEMON_CLASSPATH"); memKey = "SPARK_DAEMON_MEMORY"; break; case "org.apache.spark.executor.CoarseGrainedExecutorBackend": @@ -77,11 +80,13 @@ public List buildCommand(Map env) break; case "org.apache.spark.deploy.mesos.MesosClusterDispatcher": javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS"); + extraClassPath = getenv("SPARK_DAEMON_CLASSPATH"); break; case "org.apache.spark.deploy.ExternalShuffleService": case "org.apache.spark.deploy.mesos.MesosExternalShuffleService": javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS"); javaOptsKeys.add("SPARK_SHUFFLE_OPTS"); + extraClassPath = getenv("SPARK_DAEMON_CLASSPATH"); memKey = "SPARK_DAEMON_MEMORY"; break; default: