Skip to content

Commit

Permalink
updated documentation to overview build/mvn, updated all points where…
Browse files Browse the repository at this point in the history
… sbt/sbt was referenced with build/sbt
  • Loading branch information
Brennon York committed Dec 23, 2014
1 parent b8437ba commit c5634de
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 47 deletions.
10 changes: 5 additions & 5 deletions dev/create-release/create-release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ if [[ ! "$@" =~ --package-only ]]; then
git commit -a -m "Preparing development version $next_ver"
git push origin $GIT_TAG
git push origin HEAD:$GIT_BRANCH
git checkout -f $GIT_TAG
git checkout -f $GIT_TAG

# Using Nexus API documented here:
# https://support.sonatype.com/entries/39720203-Uploading-to-a-Staging-Repository-via-REST-API
echo "Creating Nexus staging repository"
Expand All @@ -106,7 +106,7 @@ if [[ ! "$@" =~ --package-only ]]; then
clean install

./dev/change-version-to-2.11.sh

mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
-Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
clean install
Expand Down Expand Up @@ -174,7 +174,7 @@ make_binary_release() {
NAME=$1
FLAGS=$2
cp -r spark spark-$RELEASE_VERSION-bin-$NAME

cd spark-$RELEASE_VERSION-bin-$NAME

# TODO There should probably be a flag to make-distribution to allow 2.11 support
Expand Down Expand Up @@ -219,7 +219,7 @@ scp spark-* \

# Docs
cd spark
sbt/sbt clean
build/sbt clean
cd docs
# Compile docs with Java 7 to use nicer format
JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build
Expand Down
8 changes: 4 additions & 4 deletions dev/mima
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,21 @@ set -e
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
cd "$FWDIR"

echo -e "q\n" | sbt/sbt oldDeps/update
echo -e "q\n" | build/sbt oldDeps/update
rm -f .generated-mima*

# Generate Mima Ignore is called twice, first with latest built jars
# Generate Mima Ignore is called twice, first with latest built jars
# on the classpath and then again with previous version jars on the classpath.
# Because of a bug in GenerateMIMAIgnore that when old jars are ahead on classpath
# it did not process the new classes (which are in assembly jar).
# it did not process the new classes (which are in assembly jar).
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore

export SPARK_CLASSPATH="`find lib_managed \( -name '*spark*jar' -a -type f \) | tr "\\n" ":"`"
echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"

./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore

echo -e "q\n" | sbt/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
echo -e "q\n" | build/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
ret_val=$?

if [ $ret_val != 0 ]; then
Expand Down
24 changes: 12 additions & 12 deletions dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,17 @@ export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"
{
if test -x "$JAVA_HOME/bin/java"; then
declare java_cmd="$JAVA_HOME/bin/java"
else
else
declare java_cmd=java
fi

# We can't use sed -r -e due to OS X / BSD compatibility; hence, all the parentheses.
JAVA_VERSION=$(
$java_cmd -version 2>&1 \
| grep -e "^java version" --max-count=1 \
| sed "s/java version \"\(.*\)\.\(.*\)\.\(.*\)\"/\1\2/"
)

if [ "$JAVA_VERSION" -lt 18 ]; then
echo "[warn] Java 8 tests will not run because JDK version is < 1.8."
fi
Expand All @@ -79,7 +79,7 @@ export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl"
# Partial solution for SPARK-1455.
if [ -n "$AMPLAB_JENKINS" ]; then
git fetch origin master:master

sql_diffs=$(
git diff --name-only master \
| grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh"
Expand All @@ -93,7 +93,7 @@ if [ -n "$AMPLAB_JENKINS" ]; then
if [ -n "$sql_diffs" ]; then
echo "[info] Detected changes in SQL. Will run Hive test suite."
_RUN_SQL_TESTS=true

if [ -z "$non_sql_diffs" ]; then
echo "[info] Detected no changes except in SQL. Will only run SQL tests."
_SQL_TESTS_ONLY=true
Expand Down Expand Up @@ -151,14 +151,14 @@ CURRENT_BLOCK=$BLOCK_BUILD
HIVE_12_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver -Phive-0.12.0"
echo "[info] Compile with hive 0.12"
echo -e "q\n" \
| sbt/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
| build/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"

# Then build with default version(0.13.1) because tests are based on this version
echo "[info] Building Spark with these arguments: $SBT_MAVEN_PROFILES_ARGS"\
" -Phive -Phive-thriftserver"
echo -e "q\n" \
| sbt/sbt $SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver package assembly/assembly \
| build/sbt $SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver package assembly/assembly \
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
}

Expand All @@ -175,27 +175,27 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
if [ -n "$_RUN_SQL_TESTS" ]; then
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver"
fi

if [ -n "$_SQL_TESTS_ONLY" ]; then
# This must be an array of individual arguments. Otherwise, having one long string
#+ will be interpreted as a single test, which doesn't work.
SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test" "mllib/test")
else
SBT_MAVEN_TEST_ARGS=("test")
fi

echo "[info] Running Spark tests with these arguments: $SBT_MAVEN_PROFILES_ARGS ${SBT_MAVEN_TEST_ARGS[@]}"

# NOTE: echo "q" is needed because sbt on encountering a build file with failure
#+ (either resolution or compilation) prompts the user for input either q, r, etc
#+ to quit or retry. This echo is there to make it not block.
# NOTE: Do not quote $SBT_MAVEN_PROFILES_ARGS or else it will be interpreted as a
# NOTE: Do not quote $SBT_MAVEN_PROFILES_ARGS or else it will be interpreted as a
#+ single argument!
#+ "${SBT_MAVEN_TEST_ARGS[@]}" is cool because it's an array.
# QUESTION: Why doesn't 'yes "q"' work?
# QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work?
echo -e "q\n" \
| sbt/sbt $SBT_MAVEN_PROFILES_ARGS "${SBT_MAVEN_TEST_ARGS[@]}" \
| build/sbt $SBT_MAVEN_PROFILES_ARGS "${SBT_MAVEN_TEST_ARGS[@]}" \
| grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
}

Expand Down
6 changes: 3 additions & 3 deletions dev/scalastyle
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
# limitations under the License.
#

echo -e "q\n" | sbt/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
echo -e "q\n" | build/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
# Check style with YARN alpha built too
echo -e "q\n" | sbt/sbt -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.9 yarn-alpha/scalastyle \
echo -e "q\n" | build/sbt -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.9 yarn-alpha/scalastyle \
>> scalastyle.txt
# Check style with YARN built too
echo -e "q\n" | sbt/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 yarn/scalastyle \
echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 yarn/scalastyle \
>> scalastyle.txt

ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}')
Expand Down
6 changes: 3 additions & 3 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ read those text files directly if you want. Start with index.md.

The markdown code can be compiled to HTML using the [Jekyll tool](http://jekyllrb.com).
`Jekyll` and a few dependencies must be installed for this to work. We recommend
installing via the Ruby Gem dependency manager. Since the exact HTML output
installing via the Ruby Gem dependency manager. Since the exact HTML output
varies between versions of Jekyll and its dependencies, we list specific versions here
in some cases:

Expand Down Expand Up @@ -60,15 +60,15 @@ We use Sphinx to generate Python API docs, so you will need to install it by run

## API Docs (Scaladoc and Sphinx)

You can build just the Spark scaladoc by running `sbt/sbt doc` from the SPARK_PROJECT_ROOT directory.
You can build just the Spark scaladoc by running `build/sbt doc` from the SPARK_PROJECT_ROOT directory.

Similarly, you can build just the PySpark docs by running `make html` from the
SPARK_PROJECT_ROOT/python/docs directory. Documentation is only generated for classes that are listed as
public in `__init__.py`.

When you run `jekyll` in the `docs` directory, it will also copy over the scaladoc for the various
Spark subprojects into the `docs` directory (and then also into the `_site` directory). We use a
jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it
jekyll plugin to run `build/sbt doc` before building the site so if you haven't run it (recently) it
may take some time as it generates all of the scaladoc. The jekyll plugin also generates the
PySpark docs [Sphinx](http://sphinx-doc.org/).

Expand Down
4 changes: 2 additions & 2 deletions docs/_plugins/copy_api_dirs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
curr_dir = pwd
cd("..")

puts "Running 'sbt/sbt -Pkinesis-asl compile unidoc' from " + pwd + "; this may take a few minutes..."
puts `sbt/sbt -Pkinesis-asl compile unidoc`
puts "Running 'build/sbt -Pkinesis-asl compile unidoc' from " + pwd + "; this may take a few minutes..."
puts `build/sbt -Pkinesis-asl compile unidoc`

puts "Moving back into docs dir."
cd("docs")
Expand Down
24 changes: 17 additions & 7 deletions docs/building-spark.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ Building Spark using Maven requires Maven 3.0.4 or newer and Java 6+.

# Building with `build/mvn`

Spark now comes packaged with a self-contained Maven installation to ease building and deployment of Spark from source located under the `build/` directory. This script will automatically download and setup all necessary build requirements ([Maven](https://maven.apache.org/), [Scala](http://www.scala-lang.org/), and [Zinc](https://github.com/typesafehub/zinc)) locally within the `build/` directory itself. It honors any `mvn` binary if present already, however, will pull down its own copy of Scala and Zinc regardless to ensure proper version requirements are met. `build/mvn` execution acts as a pass through to the `mvn` call allowing easy transition from previous build methods. As an example, one can build a version of Spark as follows:

{% highlight bash %}
build/mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
{% endhighlight %}

Other build examples can be found below.

# Setting up Maven's Memory Usage

You'll need to configure Maven to use more memory than usual by setting `MAVEN_OPTS`. We recommend the following settings:
Expand All @@ -29,7 +37,9 @@ If you don't run this, you may see errors like the following:

You can fix this by setting the `MAVEN_OPTS` variable as discussed before.

**Note:** *For Java 8 and above this step is not required.*
**Note:**
* *For Java 8 and above this step is not required.*
* *If using `build/mvn` and `MAVEN_OPTS` were not already set, the script will automate with for you.*

# Specifying the Hadoop Version

Expand Down Expand Up @@ -190,22 +200,22 @@ compilation. More advanced developers may wish to use SBT.
The SBT build is derived from the Maven POM files, and so the same Maven profiles and variables
can be set to control the SBT build. For example:

sbt/sbt -Pyarn -Phadoop-2.3 assembly
build/sbt -Pyarn -Phadoop-2.3 assembly

# Testing with SBT

Some of the tests require Spark to be packaged first, so always run `sbt/sbt assembly` the first time. The following is an example of a correct (build, test) sequence:
Some of the tests require Spark to be packaged first, so always run `build/sbt assembly` the first time. The following is an example of a correct (build, test) sequence:

sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver assembly
sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver test
build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver assembly
build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver test

To run only a specific test suite as follows:

sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver "test-only org.apache.spark.repl.ReplSuite"
build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver "test-only org.apache.spark.repl.ReplSuite"

To run test suites of a specific sub project as follows:

sbt/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver core/test
build/sbt -Pyarn -Phadoop-2.3 -Phive -Phive-thriftserver core/test

# Speeding up Compilation with Zinc

Expand Down
10 changes: 5 additions & 5 deletions docs/hadoop-third-party-distributions.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ see the guide on [building with maven](building-spark.html#specifying-the-hadoop

The table below lists the corresponding `hadoop.version` code for each CDH/HDP release. Note that
some Hadoop releases are binary compatible across client versions. This means the pre-built Spark
distribution may "just work" without you needing to compile. That said, we recommend compiling with
distribution may "just work" without you needing to compile. That said, we recommend compiling with
the _exact_ Hadoop version you are running to avoid any compatibility errors.

<table>
Expand Down Expand Up @@ -50,7 +50,7 @@ the _exact_ Hadoop version you are running to avoid any compatibility errors.

In SBT, the equivalent can be achieved by setting the the `hadoop.version` property:

sbt/sbt -Dhadoop.version=1.0.4 assembly
build/sbt -Dhadoop.version=1.0.4 assembly

# Linking Applications to the Hadoop Version

Expand Down Expand Up @@ -98,11 +98,11 @@ Spark can run in a variety of deployment modes:

* Using dedicated set of Spark nodes in your cluster. These nodes should be co-located with your
Hadoop installation.
* Running on the same nodes as an existing Hadoop installation, with a fixed amount memory and
* Running on the same nodes as an existing Hadoop installation, with a fixed amount memory and
cores dedicated to Spark on each node.
* Run Spark alongside Hadoop using a cluster resource manager, such as YARN or Mesos.

These options are identical for those using CDH and HDP.
These options are identical for those using CDH and HDP.

# Inheriting Cluster Configuration

Expand All @@ -116,5 +116,5 @@ The location of these configuration files varies across CDH and HDP versions, bu
a common location is inside of `/etc/hadoop/conf`. Some tools, such as Cloudera Manager, create
configurations on-the-fly, but offer a mechanisms to download copies of them.

To make these files visible to Spark, set `HADOOP_CONF_DIR` in `$SPARK_HOME/spark-env.sh`
To make these files visible to Spark, set `HADOOP_CONF_DIR` in `$SPARK_HOME/spark-env.sh`
to a location containing the configuration files.
6 changes: 3 additions & 3 deletions extras/java8-tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ to your Java location. The set-up depends a bit on the build system:
`-java-home` to the sbt launch script. If a Java 8 JDK is detected sbt will automatically
include the Java 8 test project.

`$ JAVA_HOME=/opt/jdk1.8.0/ sbt/sbt clean "test-only org.apache.spark.Java8APISuite"`
`$ JAVA_HOME=/opt/jdk1.8.0/ build/sbt clean "test-only org.apache.spark.Java8APISuite"`

* For Maven users,

Expand All @@ -19,6 +19,6 @@ to your Java location. The set-up depends a bit on the build system:
`$ JAVA_HOME=/opt/jdk1.8.0/ mvn clean install -DskipTests`
`$ JAVA_HOME=/opt/jdk1.8.0/ mvn test -Pjava8-tests -DwildcardSuites=org.apache.spark.Java8APISuite`

Note that the above command can only be run from project root directory since this module
depends on core and the test-jars of core and streaming. This means an install step is
Note that the above command can only be run from project root directory since this module
depends on core and the test-jars of core and streaming. This means an install step is
required to make the test dependencies visible to the Java 8 sub-project.
2 changes: 1 addition & 1 deletion python/pyspark/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1668,7 +1668,7 @@ def _ssql_ctx(self):
except Py4JError as e:
raise Exception("You must build Spark with Hive. "
"Export 'SPARK_HIVE=true' and run "
"sbt/sbt assembly", e)
"build/sbt assembly", e)

def _get_hive_ctx(self):
return self._jvm.HiveContext(self._jsc.sc())
Expand Down
4 changes: 2 additions & 2 deletions sql/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ export HADOOP_HOME="<path to>/hadoop-1.0.4"

Using the console
=================
An interactive scala console can be invoked by running `sbt/sbt hive/console`. From here you can execute queries and inspect the various stages of query optimization.
An interactive scala console can be invoked by running `build/sbt hive/console`. From here you can execute queries and inspect the various stages of query optimization.

```scala
catalyst$ sbt/sbt hive/console
catalyst$ build/sbt hive/console

[info] Starting scala interpreter...
import org.apache.spark.sql.catalyst.analysis._
Expand Down

0 comments on commit c5634de

Please sign in to comment.