From cb050a0a4708efe19b219ffea50d8889ee11c750 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 5 Apr 2016 22:48:32 -0700 Subject: [PATCH 1/2] Update the build docs to switch from assembly to package and add a note in spark-class if we can't find the required target --- bin/spark-class | 1 + docs/building-spark.md | 13 +++---------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index b489591778cb..fd90217490e2 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -45,6 +45,7 @@ fi if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2 echo "You need to build Spark before running this program." 1>&2 + echo "Note: In Spark 2.0 the required build target has changed from \"assembly\" to \"package\"" 1>&2 exit 1 else LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*" diff --git a/docs/building-spark.md b/docs/building-spark.md index 13aa80496eae..40661604af94 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -190,13 +190,6 @@ or Java 8 tests are automatically enabled when a Java 8 JDK is detected. If you have JDK 8 installed but it is not the system default, you can set JAVA_HOME to point to JDK 8 before running the tests. -# Building for PySpark on YARN - -PySpark on YARN is only supported if the jar is built with Maven. Further, there is a known problem -with building this assembly jar on Red Hat based operating systems (see [SPARK-1753](https://issues.apache.org/jira/browse/SPARK-1753)). If you wish to -run PySpark on a YARN cluster with Red Hat installed, we recommend that you build the jar elsewhere, -then ship it over to the cluster. We are investigating the exact cause for this. - # Packaging without Hadoop Dependencies for YARN The assembly jar produced by `mvn package` will, by default, include all of Spark's dependencies, including Hadoop and some of its ecosystem projects. On YARN deployments, this causes multiple versions of these to appear on executor classpaths: the version packaged in the Spark assembly and the version on each node, included with `yarn.application.classpath`. The `hadoop-provided` profile builds the assembly without including Hadoop-ecosystem projects, like ZooKeeper and Hadoop itself. @@ -210,7 +203,7 @@ compilation. More advanced developers may wish to use SBT. The SBT build is derived from the Maven POM files, and so the same Maven profiles and variables can be set to control the SBT build. For example: - build/sbt -Pyarn -Phadoop-2.3 assembly + build/sbt -Pyarn -Phadoop-2.3 package To avoid the overhead of launching sbt each time you need to re-compile, you can launch sbt in interactive mode by running `build/sbt`, and then run all build commands at the command @@ -219,9 +212,9 @@ prompt. For more recommendations on reducing build time, refer to the # Testing with SBT -Some of the tests require Spark to be packaged first, so always run `build/sbt assembly` the first time. The following is an example of a correct (build, test) sequence: +Some of the tests require Spark to be packaged first, so always run `build/sbt package` the first time. The following is an example of a correct (build, test) sequence: - build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver assembly + build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver package build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver test To run only a specific test suite as follows: From 7ace69c88448b5220499078ad9cded89ddd19ae8 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 6 Apr 2016 11:01:08 -0700 Subject: [PATCH 2/2] Simplify message --- bin/spark-class | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index fd90217490e2..b2a36b984678 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -44,8 +44,7 @@ fi if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2 - echo "You need to build Spark before running this program." 1>&2 - echo "Note: In Spark 2.0 the required build target has changed from \"assembly\" to \"package\"" 1>&2 + echo "You need to build Spark with the target \"package\" before running this program." 1>&2 exit 1 else LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"