apache · sarutak · Sep 18, 2014 · Sep 22, 2014 · Sep 22, 2014 · Sep 22, 2014
diff --git a/.gitignore b/.gitignore
@@ -23,6 +23,7 @@ conf/*.cmd
 conf/*.properties
 conf/*.conf
 conf/*.xml
+conf/slaves
 docs/_site
 docs/api
 target/

diff --git a/.rat-excludes b/.rat-excludes
@@ -19,6 +19,7 @@ log4j.properties
 log4j.properties.template
 metrics.properties.template
 slaves
+slaves.template
 spark-env.sh
 spark-env.cmd
 spark-env.sh.template

diff --git a/conf/slaves → conf/slaves.template b/conf/slaves → conf/slaves.template
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
@@ -62,7 +62,7 @@ Finally, the following configuration options can be passed to the master and wor
 
 # Cluster Launch Scripts
 
-To launch a Spark standalone cluster with the launch scripts, you need to create a file called `conf/slaves` in your Spark directory, which should contain the hostnames of all the machines where you would like to start Spark workers, one per line. The master machine must be able to access each of the slave machines via password-less `ssh` (using a private key). For testing, you can just put `localhost` in this file.
+To launch a Spark standalone cluster with the launch scripts, you need to create a file called `conf/slaves` in your Spark directory, which should contain the hostnames of all the machines where you would like to start Spark workers, one per line. If `conf/slaves` does not exist, the launch scripts use a list which contains single hostname `localhost`. This can be used for testing. The master machine must be able to access each of the slave machines via `ssh`. By default, `ssh` is executed in the background for parallel execution for each slave machine. If you would like to use password authentication instead of password-less(using a private key) for `ssh`, `ssh` does not work well in the background. To avoid this, you can set a environment variable `SPARK_SSH_FOREGROUND` to `yes` or `y` to execute `ssh` in the foreground.
 
 Once you've set up this file, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`:
 

diff --git a/sbin/slaves.sh b/sbin/slaves.sh
@@ -44,7 +44,9 @@ sbin="`cd "$sbin"; pwd`"
 # If the slaves file is specified in the command line,
 # then it takes precedence over the definition in
 # spark-env.sh. Save it here.
-HOSTLIST="$SPARK_SLAVES"
+if [ -f "$SPARK_SLAVES" ]; then
+  HOSTLIST=`cat "$SPARK_SLAVES"`
+fi
 
 # Check if --config is passed as an argument. It is an optional parameter.
 # Exit if the argument is not a directory.
@@ -67,23 +69,34 @@ fi
 
 if [ "$HOSTLIST" = "" ]; then
   if [ "$SPARK_SLAVES" = "" ]; then
-    export HOSTLIST="${SPARK_CONF_DIR}/slaves"
+    if [ -f "${SPARK_CONF_DIR}/slaves" ]; then
+      HOSTLIST=`cat "${SPARK_CONF_DIR}/slaves"`
+    else
+      HOSTLIST=localhost
+    fi
   else
-    export HOSTLIST="${SPARK_SLAVES}"
+    HOSTLIST=`cat "${SPARK_SLAVES}"`
   fi
 fi
 
+
+
 # By default disable strict host key checking
 if [ "$SPARK_SSH_OPTS" = "" ]; then
   SPARK_SSH_OPTS="-o StrictHostKeyChecking=no"
 fi
 
-for slave in `cat "$HOSTLIST"|sed  "s/#.*$//;/^$/d"`; do
- ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
-   2>&1 | sed "s/^/$slave: /" &
- if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
-   sleep $SPARK_SLAVE_SLEEP
- fi
+for slave in `echo "$HOSTLIST"|sed  "s/#.*$//;/^$/d"`; do
+  if [ "${SPARK_SSH_FOREGROUND}" = "y" ] || [ "${SPARK_SSH_FOREGROUND}" = "yes" ]; then
+    ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
+      2>&1 | sed "s/^/$slave: /"
+  else
+    ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
+      2>&1 | sed "s/^/$slave: /" &
+  fi
+  if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
+    sleep $SPARK_SLAVE_SLEEP
+  fi
 done
 
 wait