Skip to content

Commit

Permalink
Merge branch 'master' into yarn-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Marcelo Vanzin committed Sep 8, 2014
2 parents add8416 + 7db5339 commit d07ef9a
Show file tree
Hide file tree
Showing 213 changed files with 5,575 additions and 3,762 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ Spark is a fast and general cluster computing system for Big Data. It provides
high-level APIs in Scala, Java, and Python, and an optimized engine that
supports general computation graphs for data analysis. It also supports a
rich set of higher-level tools including Spark SQL for SQL and structured
data processing, MLLib for machine learning, GraphX for graph processing,
and Spark Streaming.
data processing, MLlib for machine learning, GraphX for graph processing,
and Spark Streaming for stream processing.

<http://spark.apache.org/>

Expand Down
2 changes: 1 addition & 1 deletion assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>1.1.0-SNAPSHOT</version>
<version>1.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion bagel/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>1.1.0-SNAPSHOT</version>
<version>1.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion bin/beeline
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
set -o posix

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

CLASS="org.apache.hive.beeline.BeeLine"
exec "$FWDIR/bin/spark-class" $CLASS "$@"
12 changes: 6 additions & 6 deletions bin/compute-classpath.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
SCALA_VERSION=2.10

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

. $FWDIR/bin/load-spark-env.sh
. "$FWDIR"/bin/load-spark-env.sh

# Build up classpath
CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf"
Expand Down Expand Up @@ -63,7 +63,7 @@ else
assembly_folder="$ASSEMBLY_DIR"
fi

num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
num_jars="$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)"
if [ "$num_jars" -eq "0" ]; then
echo "Failed to find Spark assembly in $assembly_folder"
echo "You need to build Spark before running this program."
Expand All @@ -77,7 +77,7 @@ if [ "$num_jars" -gt "1" ]; then
exit 1
fi

ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
ASSEMBLY_JAR="$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)"

# Verify that versions of java used to build the jars and run Spark are compatible
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
Expand All @@ -103,8 +103,8 @@ else
datanucleus_dir="$FWDIR"/lib_managed/jars
fi

datanucleus_jars=$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")
datanucleus_jars=$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)
datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"

if [ -n "$datanucleus_jars" ]; then
hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)
Expand Down
4 changes: 2 additions & 2 deletions bin/load-spark-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ if [ -z "$SPARK_ENV_LOADED" ]; then
export SPARK_ENV_LOADED=1

# Returns the parent of the directory this script lives in.
parent_dir="$(cd `dirname $0`/..; pwd)"
parent_dir="$(cd "`dirname "$0"`"/..; pwd)"

user_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"}
user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}"

if [ -f "${user_conf_dir}/spark-env.sh" ]; then
# Promote all variable declarations to environment (exported) variables
Expand Down
22 changes: 12 additions & 10 deletions bin/pyspark
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@
#

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"

source $FWDIR/bin/utils.sh
source "$FWDIR/bin/utils.sh"

SCALA_VERSION=2.10

function usage() {
echo "Usage: ./bin/pyspark [options]" 1>&2
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
}

Expand All @@ -48,7 +48,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
fi
fi

. $FWDIR/bin/load-spark-env.sh
. "$FWDIR"/bin/load-spark-env.sh

# Figure out which Python executable to use
if [[ -z "$PYSPARK_PYTHON" ]]; then
Expand All @@ -57,12 +57,12 @@ fi
export PYSPARK_PYTHON

# Add the PySpark classes to the Python path:
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"

# Load the PySpark shell.py script when ./pyspark is used interactively:
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
export PYTHONSTARTUP="$FWDIR/python/pyspark/shell.py"

# If IPython options are specified, assume user wants to run IPython
if [[ -n "$IPYTHON_OPTS" ]]; then
Expand All @@ -85,6 +85,8 @@ export PYSPARK_SUBMIT_ARGS

# For pyspark tests
if [[ -n "$SPARK_TESTING" ]]; then
unset YARN_CONF_DIR
unset HADOOP_CONF_DIR
if [[ -n "$PYSPARK_DOC_TEST" ]]; then
exec "$PYSPARK_PYTHON" -m doctest $1
else
Expand All @@ -97,10 +99,10 @@ fi
if [[ "$1" =~ \.py$ ]]; then
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
primary=$1
primary="$1"
shift
gatherSparkSubmitOpts "$@"
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
exec "$FWDIR"/bin/spark-submit "${SUBMISSION_OPTS[@]}" "$primary" "${APPLICATION_OPTS[@]}"
else
# PySpark shell requires special handling downstream
export PYSPARK_SHELL=1
Expand Down
8 changes: 4 additions & 4 deletions bin/run-example
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

SCALA_VERSION=2.10

FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
export SPARK_HOME="$FWDIR"
EXAMPLES_DIR="$FWDIR"/examples

Expand All @@ -35,12 +35,12 @@ else
fi

if [ -f "$FWDIR/RELEASE" ]; then
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`
export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`"
fi

if [[ -z $SPARK_EXAMPLES_JAR ]]; then
if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
echo "You need to build Spark before running this program" 1>&2
exit 1
Expand Down
20 changes: 10 additions & 10 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ esac
SCALA_VERSION=2.10

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"

. $FWDIR/bin/load-spark-env.sh
. "$FWDIR"/bin/load-spark-env.sh

if [ -z "$1" ]; then
echo "Usage: spark-class <class> [<args>]" 1>&2
Expand Down Expand Up @@ -105,7 +105,7 @@ else
exit 1
fi
fi
JAVA_VERSION=$($RUNNER -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
JAVA_VERSION=$("$RUNNER" -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')

# Set JAVA_OPTS to be able to load native libraries and to set heap size
if [ "$JAVA_VERSION" -ge 18 ]; then
Expand All @@ -117,7 +117,7 @@ JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"

# Load extra JAVA_OPTS from conf/java-opts, if it exists
if [ -e "$FWDIR/conf/java-opts" ] ; then
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
JAVA_OPTS="$JAVA_OPTS `cat "$FWDIR"/conf/java-opts`"
fi

# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
Expand All @@ -126,21 +126,21 @@ TOOLS_DIR="$FWDIR"/tools
SPARK_TOOLS_JAR=""
if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
# Use the JAR from the SBT build
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`"
fi
if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
# Use the JAR from the Maven build
# TODO: this also needs to become an assembly!
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`"
fi

# Compute classpath using external script
classpath_output=$($FWDIR/bin/compute-classpath.sh)
classpath_output=$("$FWDIR"/bin/compute-classpath.sh)
if [[ "$?" != "0" ]]; then
echo "$classpath_output"
exit 1
else
CLASSPATH=$classpath_output
CLASSPATH="$classpath_output"
fi

if [[ "$1" =~ org.apache.spark.tools.* ]]; then
Expand All @@ -153,9 +153,9 @@ if [[ "$1" =~ org.apache.spark.tools.* ]]; then
fi

if $cygwin; then
CLASSPATH=`cygpath -wp $CLASSPATH`
CLASSPATH="`cygpath -wp "$CLASSPATH"`"
if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
export SPARK_TOOLS_JAR=`cygpath -w $SPARK_TOOLS_JAR`
export SPARK_TOOLS_JAR="`cygpath -w "$SPARK_TOOLS_JAR"`"
fi
fi
export CLASSPATH
Expand Down
10 changes: 5 additions & 5 deletions bin/spark-shell
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ esac
set -o posix

## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

function usage() {
echo "Usage: ./bin/spark-shell [options]"
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
}

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
usage
fi

source $FWDIR/bin/utils.sh
source "$FWDIR"/bin/utils.sh
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"

Expand All @@ -54,11 +54,11 @@ function main() {
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
fi
}

Expand Down
8 changes: 4 additions & 4 deletions bin/spark-sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
CLASS_NOT_FOUND_EXIT_STATUS=1

# Figure out where Spark is installed
FWDIR="$(cd `dirname $0`/..; pwd)"
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

function usage {
echo "Usage: ./bin/spark-sql [options] [cli option]"
Expand All @@ -38,18 +38,18 @@ function usage {
pattern+="\|--help"
pattern+="\|======="

$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
echo
echo "CLI options:"
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
"$FWDIR"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
}

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
usage
exit 0
fi

source $FWDIR/bin/utils.sh
source "$FWDIR"/bin/utils.sh
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"

Expand Down
4 changes: 2 additions & 2 deletions bin/spark-submit
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

# NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!

export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
ORIG_ARGS=("$@")

while (($#)); do
Expand Down Expand Up @@ -59,5 +59,5 @@ if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FI
fi
fi

exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
exec "$SPARK_HOME"/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"

2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>1.1.0-SNAPSHOT</version>
<version>1.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
5 changes: 2 additions & 3 deletions core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SparkD
import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
import org.apache.spark.scheduler.local.LocalBackend
import org.apache.spark.storage._
import org.apache.spark.SPARK_VERSION
import org.apache.spark.ui.SparkUI
import org.apache.spark.util.{CallSite, ClosureCleaner, MetadataCleaner, MetadataCleanerType, TimeStampedWeakValueHashMap, Utils}

Expand Down Expand Up @@ -825,7 +826,7 @@ class SparkContext(config: SparkConf) extends Logging {
}

/** The version of Spark on which this application is running. */
def version = SparkContext.SPARK_VERSION
def version = SPARK_VERSION

/**
* Return a map from the slave to the max memory available for caching and the remaining
Expand Down Expand Up @@ -1297,8 +1298,6 @@ class SparkContext(config: SparkConf) extends Logging {
*/
object SparkContext extends Logging {

private[spark] val SPARK_VERSION = "1.0.0"

private[spark] val SPARK_JOB_DESCRIPTION = "spark.job.description"

private[spark] val SPARK_JOB_GROUP_ID = "spark.jobGroup.id"
Expand Down
Loading

0 comments on commit d07ef9a

Please sign in to comment.