Skip to content

Commit

Permalink
Merge branch 'master' into SPARK-2889
Browse files Browse the repository at this point in the history
Conflicts:
	core/src/main/scala/org/apache/spark/util/FileLogger.scala
	yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
  • Loading branch information
Marcelo Vanzin committed Aug 20, 2014
2 parents 3f26760 + b3ec51b commit 0ac3fdf
Show file tree
Hide file tree
Showing 378 changed files with 11,217 additions and 4,691 deletions.
1 change: 1 addition & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ log4j-defaults.properties
bootstrap-tooltip.js
jquery-1.11.1.min.js
sorttable.js
.*avsc
.*txt
.*json
.*data
Expand Down
32 changes: 0 additions & 32 deletions .travis.yml

This file was deleted.

5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
</dependency>


## A Note About Thrift JDBC server and CLI for Spark SQL

Spark SQL supports Thrift JDBC server and CLI.
See sql-programming-guide.md for more information about using the JDBC server.

## Configuration

Please refer to the [Configuration guide](http://spark.apache.org/docs/latest/configuration.html)
Expand Down
5 changes: 0 additions & 5 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,6 @@
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>
<profile>
<id>hive-thriftserver</id>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
Expand Down
18 changes: 14 additions & 4 deletions bin/pyspark
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,18 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"

source $FWDIR/bin/utils.sh

SCALA_VERSION=2.10

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
function usage() {
echo "Usage: ./bin/pyspark [options]" 1>&2
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
}

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
usage
fi

# Exit if the user hasn't compiled Spark
Expand Down Expand Up @@ -66,10 +72,11 @@ fi
# Build up arguments list manually to preserve quotes and backslashes.
# We export Spark submit arguments as an environment variable because shell.py must run as a
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.

SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"
PYSPARK_SUBMIT_ARGS=""
whitespace="[[:space:]]"
for i in "$@"; do
for i in "${SUBMISSION_OPTS[@]}"; do
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
Expand All @@ -90,7 +97,10 @@ fi
if [[ "$1" =~ \.py$ ]]; then
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
exec $FWDIR/bin/spark-submit "$@"
primary=$1
shift
gatherSparkSubmitOpts "$@"
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
else
# Only use ipython if no command line arguments were provided [SPARK-1134]
if [[ "$IPYTHON" = "1" ]]; then
Expand Down
49 changes: 38 additions & 11 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# limitations under the License.
#

# NOTE: Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!

cygwin=false
case "`uname`" in
CYGWIN*) cygwin=true;;
Expand All @@ -39,7 +41,7 @@ fi

if [ -n "$SPARK_MEM" ]; then
echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2
echo -e "(e.g., spark.executor.memory or spark.driver.memory)." 1>&2
fi

# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
Expand Down Expand Up @@ -73,11 +75,17 @@ case "$1" in
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
;;

# Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
'org.apache.spark.deploy.SparkSubmit')
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \
-Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
# Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
# SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
'org.apache.spark.deploy.SparkSubmit')
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
fi
if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
fi
;;

*)
Expand All @@ -101,11 +109,12 @@ fi
# Set JAVA_OPTS to be able to load native libraries and to set heap size
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"

# Load extra JAVA_OPTS from conf/java-opts, if it exists
if [ -e "$FWDIR/conf/java-opts" ] ; then
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
fi
export JAVA_OPTS

# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!

TOOLS_DIR="$FWDIR"/tools
Expand Down Expand Up @@ -146,10 +155,28 @@ if $cygwin; then
fi
export CLASSPATH

if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
echo -n "Spark Command: " 1>&2
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
echo -e "========================================\n" 1>&2
# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
# Here we must parse the properties file for relevant "spark.driver.*" configs before launching
# the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM
# to prepare the launch environment of this driver JVM.

if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then
# This is used only if the properties file actually contains these special configs
# Export the environment variables needed by SparkSubmitDriverBootstrapper
export RUNNER
export CLASSPATH
export JAVA_OPTS
export OUR_JAVA_MEM
export SPARK_CLASS=1
shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own
exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@"
else
# Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala
if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
echo -n "Spark Command: " 1>&2
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
echo -e "========================================\n" 1>&2
fi
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
fi

exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
20 changes: 14 additions & 6 deletions bin/spark-shell
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,21 @@ set -o posix
## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"

function usage() {
echo "Usage: ./bin/spark-shell [options]"
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
}

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
echo "Usage: ./bin/spark-shell [options]"
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
usage
fi

function main(){
source $FWDIR/bin/utils.sh
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"

function main() {
if $cygwin; then
# Workaround for issue involving JLine and Cygwin
# (see http://sourceforge.net/p/jline/bugs/40/).
Expand All @@ -46,11 +54,11 @@ function main(){
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
fi
}

Expand Down
2 changes: 1 addition & 1 deletion bin/spark-shell.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ rem

set SPARK_HOME=%~dp0..

cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd spark-shell --class org.apache.spark.repl.Main %*
cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell
20 changes: 10 additions & 10 deletions bin/spark-sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
FWDIR="$(cd `dirname $0`/..; pwd)"

function usage {
echo "Usage: ./sbin/spark-sql [options] [cli option]"
echo "Usage: ./bin/spark-sql [options] [cli option]"
pattern="usage"
pattern+="\|Spark assembly has been built with Hive"
pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
Expand Down Expand Up @@ -65,30 +65,30 @@ while (($#)); do
case $1 in
-d | --define | --database | -f | -h | --hiveconf | --hivevar | -i | -p)
ensure_arg_number $# 2
CLI_ARGS+=($1); shift
CLI_ARGS+=($1); shift
CLI_ARGS+=("$1"); shift
CLI_ARGS+=("$1"); shift
;;

-e)
ensure_arg_number $# 2
CLI_ARGS+=($1); shift
CLI_ARGS+=(\"$1\"); shift
CLI_ARGS+=("$1"); shift
CLI_ARGS+=("$1"); shift
;;

-s | --silent)
CLI_ARGS+=($1); shift
CLI_ARGS+=("$1"); shift
;;

-v | --verbose)
# Both SparkSubmit and SparkSQLCLIDriver recognizes -v | --verbose
CLI_ARGS+=($1)
SUBMISSION_ARGS+=($1); shift
CLI_ARGS+=("$1")
SUBMISSION_ARGS+=("$1"); shift
;;

*)
SUBMISSION_ARGS+=($1); shift
SUBMISSION_ARGS+=("$1"); shift
;;
esac
done

eval exec "$FWDIR"/bin/spark-submit --class $CLASS ${SUBMISSION_ARGS[*]} spark-internal ${CLI_ARGS[*]}
exec "$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_ARGS[@]}" spark-internal "${CLI_ARGS[@]}"
28 changes: 23 additions & 5 deletions bin/spark-submit
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@
# limitations under the License.
#

# NOTE: Any changes in this file must be reflected in SparkClassLauncher.scala!

export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
ORIG_ARGS=("$@")

while (($#)); do
if [ "$1" = "--deploy-mode" ]; then
DEPLOY_MODE=$2
SPARK_SUBMIT_DEPLOY_MODE=$2
elif [ "$1" = "--properties-file" ]; then
SPARK_SUBMIT_PROPERTIES_FILE=$2
elif [ "$1" = "--driver-memory" ]; then
DRIVER_MEMORY=$2
export SPARK_SUBMIT_DRIVER_MEMORY=$2
elif [ "$1" = "--driver-library-path" ]; then
export SPARK_SUBMIT_LIBRARY_PATH=$2
elif [ "$1" = "--driver-class-path" ]; then
Expand All @@ -35,10 +39,24 @@ while (($#)); do
shift
done

DEPLOY_MODE=${DEPLOY_MODE:-"client"}
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}

# For client mode, the driver will be launched in the same JVM that launches
# SparkSubmit, so we may need to read the properties file for any extra class
# paths, library paths, java options and memory early on. Otherwise, it will
# be too late by the time the driver JVM has started.

if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FILE" ]]; then
# Parse the properties file only if the special configs exist
contains_special_configs=$(
grep -e "spark.driver.extra*\|spark.driver.memory" "$SPARK_SUBMIT_PROPERTIES_FILE" | \
grep -v "^[[:space:]]*#"
)
if [ -n "$contains_special_configs" ]; then
export SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
fi
fi

exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
Expand Down
Loading

0 comments on commit 0ac3fdf

Please sign in to comment.