From 144af84f727cb11befc11723152e65e5e84cbb16 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@databricks.com>
Date: Sun, 18 May 2014 16:44:26 -0700
Subject: [PATCH 1/4] Update Windows scripts to match latest binary package
 layout

Also fixed an issues where SparkSubmit was trying to parse local files
as URLs, which fails on Windows because they contain backslashes. We
didn't need to treat those as URLs to check if a file exists.
---
 README.md                                     |  7 +--
 bin/compute-classpath.cmd                     | 17 ++++++-
 bin/run-example                               | 23 ++++-----
 bin/run-example2.cmd                          | 51 ++++++++++++++-----
 bin/spark-class2.cmd                          |  2 +
 .../org/apache/spark/deploy/SparkSubmit.scala |  2 +-
 6 files changed, 73 insertions(+), 29 deletions(-)
diff --git a/README.md b/README.md
index 9c2e32b90f162..6211a5889a3f5 100644
--- a/README.md
+++ b/README.md
@@ -9,13 +9,14 @@ You can find the latest Spark documentation, including a programming
 guide, on the project webpage at <http://spark.apache.org/documentation.html>.
 This README file only contains basic setup instructions.
 
-
 ## Building Spark
 
 Spark is built on Scala 2.10. To build Spark and its example programs, run:
 
     ./sbt/sbt assembly
 
+(You do not need to do this if you downloaded a pre-built package.)
+
 ## Interactive Scala Shell
 
 The easiest way to start using Spark is through the Scala shell:
@@ -41,9 +42,9 @@ And run the following command, which should also return 1000:
 Spark also comes with several sample programs in the `examples` directory.
 To run one of them, use `./bin/run-example <class> [params]`. For example:
 
-    ./bin/run-example org.apache.spark.examples.SparkLR
+    ./bin/run-example SparkPi
 
-will run the Logistic Regression example locally.
+will run the Pi example locally.
 
 You can set the MASTER environment variable when running examples to submit
 examples to a cluster. This can be a mesos:// or spark:// URL, 
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index 065553eb31939..a987bf56908e7 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -31,7 +31,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
 rem Build up classpath
 set CLASSPATH=%FWDIR%conf
 if exist "%FWDIR%RELEASE" (
-  for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
+  for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
     set ASSEMBLY_JAR=%%d
   )
 ) else (
@@ -42,6 +42,21 @@ if exist "%FWDIR%RELEASE" (
 
 set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
 
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
+if exist "%FWDIR%RELEASE" (
+  set datanucleus_dir=%FWDIR%\lib
+) else (
+  set datanucleus_dir=%FWDIR%\lib_managed\jars
+)
+set "datanucleus_jars="
+for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
+  set datanucleus_jars=!datanucleus_jars!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
+
 set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
diff --git a/bin/run-example b/bin/run-example
index 146951ac0ee56..7caab31daef39 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -23,6 +23,16 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
 export SPARK_HOME="$FWDIR"
 EXAMPLES_DIR="$FWDIR"/examples
 
+if [ -n "$1" ]; then
+  EXAMPLE_CLASS="$1"
+  shift
+else
+  echo "Usage: ./bin/run-example <example-class> [example-args]"
+  echo "  - set MASTER=XX to use a specific master"
+  echo "  - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
+  exit 1
+fi
+
 if [ -f "$FWDIR/RELEASE" ]; then
   export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
 elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
@@ -37,17 +47,6 @@ fi
 
 EXAMPLE_MASTER=${MASTER:-"local[*]"}
 
-if [ -n "$1" ]; then
-  EXAMPLE_CLASS="$1"
-  shift
-else 
-  echo "usage: ./bin/run-example <example-class> [example-args]" 
-  echo "  - set MASTER=XX to use a specific master"
-  echo "  - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)"
-  echo
-  exit -1
-fi
-
 if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
   EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
 fi
@@ -55,5 +54,5 @@ fi
 ./bin/spark-submit \
   --master $EXAMPLE_MASTER \
   --class $EXAMPLE_CLASS \
-  $SPARK_EXAMPLES_JAR \
+  "$SPARK_EXAMPLES_JAR" \
   "$@"
diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd
index 40abb9af74246..5832d1fa5f774 100644
--- a/bin/run-example2.cmd
+++ b/bin/run-example2.cmd
@@ -30,7 +30,9 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
 
 rem Test that an argument was given
 if not "x%1"=="x" goto arg_given
-  echo Usage: run-example ^<example-class^> [^<args^>]
+  echo Usage: run-example ^<example-class^> [^<example-args^>]
+  echo   - set MASTER=XX to use a specific master
+  echo   - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)
   goto exit
 :arg_given
 
@@ -38,8 +40,14 @@ set EXAMPLES_DIR=%FWDIR%examples
 
 rem Figure out the JAR file that our examples were packaged into.
 set SPARK_EXAMPLES_JAR=
-for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*assembly*.jar") do (
-  set SPARK_EXAMPLES_JAR=%%d
+if exist "%FWDIR%RELEASE" (
+  for %%d in ("%FWDIR%lib\spark-examples*.jar") do (
+    set SPARK_EXAMPLES_JAR=%%d
+  )
+) else (
+  for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*.jar") do (
+    set SPARK_EXAMPLES_JAR=%%d
+  )
 )
 if "x%SPARK_EXAMPLES_JAR%"=="x" (
   echo Failed to find Spark examples assembly JAR.
@@ -47,15 +55,34 @@ if "x%SPARK_EXAMPLES_JAR%"=="x" (
   goto exit
 )
 
-rem Compute Spark classpath using external script
-set DONT_PRINT_CLASSPATH=1
-call "%FWDIR%bin\compute-classpath.cmd"
-set DONT_PRINT_CLASSPATH=0
-set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH%
+rem Set master from MASTER environment variable if given
+if "x%MASTER%"=="x" (
+  set EXAMPLE_MASTER=local[*]
+) else (
+  set EXAMPLE_MASTER=%MASTER%
+)
+
+rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples, add that
+set EXAMPLE_CLASS=%1
+set PREFIX=%EXAMPLE_CLASS:~0,25%
+if not %PREFIX%==org.apache.spark.examples (
+  set EXAMPLE_CLASS=org.apache.spark.examples.%EXAMPLE_CLASS%
+)
+
+rem Get the tail of the argument list, to skip the first one. This is surprisingly
+rem complicated on Windows.
+set "ARGS="
+:top
+shift
+if "%~1" neq "" (
+  set ARGS=%ARGS% "%~1"
+  goto :top
+)
+if defined ARGS set ARGS=%ARGS:~1%
 
-rem Figure out where java is.
-set RUNNER=java
-if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
+call "%FWDIR%bin\spark-submit.cmd" ^
+  --master %EXAMPLE_MASTER% ^
+  --class %EXAMPLE_CLASS% ^
+  "%SPARK_EXAMPLES_JAR%" %ARGS%
 
-"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
 :exit
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 4302c1b6b7ff4..266edd9fa9835 100755
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -17,6 +17,8 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem
 
+setlocal enabledelayedexpansion
+
 set SCALA_VERSION=2.10
 
 rem Figure out where the Spark framework is installed
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index a99b2176e2b5e..c54331c00fab8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -299,7 +299,7 @@ object SparkSubmit {
   }
 
   private def addJarToClasspath(localJar: String, loader: ExecutorURLClassLoader) {
-    val localJarFile = new File(new URI(localJar).getPath)
+    val localJarFile = new File(localJar)
     if (!localJarFile.exists()) {
       printWarning(s"Jar $localJar does not exist, skipping.")
     }

From d3b71c7611e5d52519fecb4e9268b0ec362ad65d Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@databricks.com>
Date: Sun, 18 May 2014 17:16:40 -0700
Subject: [PATCH 2/4] Properly exclude datanucleus files in Maven assembly

They are excluded in SBT, but the rule added in Maven didn't actually
remove the files from the JAR. The JARs built still worked despite this,
but it's better to remove them than have 2 copies on the classpath.
---
 assembly/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index abd8935339992..963357b9ab167 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -96,7 +96,7 @@
             <filter>
               <artifact>*:*</artifact>
               <excludes>
-                <exclude>org.datanucleus:*</exclude>
+                <exclude>org/datanucleus/**</exclude>
                 <exclude>META-INF/*.SF</exclude>
                 <exclude>META-INF/*.DSA</exclude>
                 <exclude>META-INF/*.RSA</exclude>

From 228577bc9dd3c884c2dc40dc9f0405790467dbcd Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@databricks.com>
Date: Mon, 19 May 2014 00:52:26 -0700
Subject: [PATCH 3/4] Review comments

---
 bin/compute-classpath.cmd | 11 +++++++++--
 bin/run-example2.cmd      |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index a987bf56908e7..45a58f24bfb87 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -20,6 +20,13 @@ rem
 rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
 rem script and the ExecutorRunner in standalone cluster mode.
 
+rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
+rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
+rem need to set it here because we use !classpath! below.
+if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
+setlocal enabledelayedexpansion
+:skip_delayed_expansion
+
 set SCALA_VERSION=2.10
 
 rem Figure out where the Spark framework is installed
@@ -47,9 +54,9 @@ rem Datanucleus jars do not work if only included in the uber jar as plugin.xml
 rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
 rem built with Hive, so look for them there.
 if exist "%FWDIR%RELEASE" (
-  set datanucleus_dir=%FWDIR%\lib
+  set datanucleus_dir=%FWDIR%lib
 ) else (
-  set datanucleus_dir=%FWDIR%\lib_managed\jars
+  set datanucleus_dir=%FWDIR%lib_managed\jars
 )
 set "datanucleus_jars="
 for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd
index 5832d1fa5f774..eadedd7fa61ff 100644
--- a/bin/run-example2.cmd
+++ b/bin/run-example2.cmd
@@ -30,7 +30,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
 
 rem Test that an argument was given
 if not "x%1"=="x" goto arg_given
-  echo Usage: run-example ^<example-class^> [^<example-args^>]
+  echo Usage: run-example ^<example-class^> [example-args]
   echo   - set MASTER=XX to use a specific master
   echo   - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)
   goto exit

From d558f960d8b7fd1d92e0ff8d34b6d6b60f52e94f Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@databricks.com>
Date: Mon, 19 May 2014 01:03:09 -0700
Subject: [PATCH 4/4] Fix comment

---
 bin/compute-classpath.cmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index 45a58f24bfb87..58710cd1bd548 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -22,7 +22,7 @@ rem script and the ExecutorRunner in standalone cluster mode.
 
 rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
 rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
-rem need to set it here because we use !classpath! below.
+rem need to set it here because we use !datanucleus_jars! below.
 if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
 setlocal enabledelayedexpansion
 :skip_delayed_expansion