From d377cc6f28dd6cae43364f61135ed8abcba3b269 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Wed, 17 Aug 2016 00:08:19 +0900 Subject: [PATCH 01/43] Fix typo comment in interpreter.sh --- bin/download-spark.sh | 1 + bin/interpreter.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 bin/download-spark.sh diff --git a/bin/download-spark.sh b/bin/download-spark.sh new file mode 100644 index 00000000000..212c4ba239e --- /dev/null +++ b/bin/download-spark.sh @@ -0,0 +1 @@ +#!/usr/bin/env bash \ No newline at end of file diff --git a/bin/interpreter.sh b/bin/interpreter.sh index b1e1a157cb0..38bddb7fae7 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -101,7 +101,7 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then if [[ -n "${SPARK_HOME}" ]]; then export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit" SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)" - # This will evantually passes SPARK_APP_JAR to classpath of SparkIMain + # This will eventually passes SPARK_APP_JAR to classpath of SparkIMain ZEPPELIN_INTP_CLASSPATH+=":${SPARK_APP_JAR}" pattern="$SPARK_HOME/python/lib/py4j-*-src.zip" From 4f3edfd87e84e65789e0e937b5330c16442fcfbe Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Wed, 17 Aug 2016 10:52:06 +0900 Subject: [PATCH 02/43] Remove spark-dependencies --- .travis.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 641b5407861..23899679a0b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,27 +40,27 @@ matrix: # Test all modules with spark 2.0.0 and scala 2.11 - jdk: "oraclejdk7" - env: SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS="" + env: SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Pspark-2.0 -Phadoop-2.3 -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS="" # Test all modules with scala 2.10 - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pbeam -Pexamples -Pscala-2.10" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS="" + env: SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Psparkr -Pscalding -Pbeam -Pexamples -Pscala-2.10" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS="" # Test all modules with scala 2.11 - jdk: "oraclejdk7" - env: SCALA_VER="2.11" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS="" + env: SCALA_VER="2.11" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" TEST_PROJECTS="" # Test spark module for 1.5.2 - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false" + env: SCALA_VER="2.10" SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false" # Test spark module for 1.4.1 - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Ppyspark -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false" + env: SCALA_VER="2.10" SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false" # Test selenium with spark module for 1.6.1 - jdk: "oraclejdk7" - env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark -Pexamples" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false" + env: TEST_SELENIUM="true" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Pexamples" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false" before_install: - "ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin" @@ -95,4 +95,3 @@ after_failure: - cat zeppelin-distribution/target/zeppelin-*-SNAPSHOT/zeppelin-*-SNAPSHOT/logs/zeppelin*.out - cat zeppelin-web/npm-debug.log - cat spark-*/logs/* - From 99ef019521ca1fd0fc41958b20da8642773825d5 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Wed, 17 Aug 2016 16:14:35 +0900 Subject: [PATCH 03/43] Add spark-2.*-bin-hadoop* to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1d37d397676..c84b79bdd75 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ spark/derby.log spark/metastore_db spark-1.*-bin-hadoop* +spark-2.*-bin-hadoop* .spark-dist zeppelin-server/derby.log From 4e8d5ff067c5428a5254e45b4de533c56393f7b4 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 18 Aug 2016 00:22:25 +0900 Subject: [PATCH 04/43] Add download-spark.sh file --- bin/common.sh | 19 ++++++---- bin/download-spark.sh | 78 +++++++++++++++++++++++++++++++++++++++++- bin/zeppelin-daemon.sh | 1 + bin/zeppelin.sh | 2 ++ 4 files changed, 93 insertions(+), 7 deletions(-) diff --git a/bin/common.sh b/bin/common.sh index 486d2b1ba43..f005becff0f 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -58,7 +58,7 @@ fi ZEPPELIN_CLASSPATH+=":${ZEPPELIN_CONF_DIR}" -function addEachJarInDir(){ +function addEachJarInDir() { if [[ -d "${1}" ]]; then for jar in $(find -L "${1}" -maxdepth 1 -name '*jar'); do ZEPPELIN_CLASSPATH="$jar:$ZEPPELIN_CLASSPATH" @@ -66,7 +66,7 @@ function addEachJarInDir(){ fi } -function addEachJarInDirRecursive(){ +function addEachJarInDirRecursive() { if [[ -d "${1}" ]]; then for jar in $(find -L "${1}" -type f -name '*jar'); do ZEPPELIN_CLASSPATH="$jar:$ZEPPELIN_CLASSPATH" @@ -74,7 +74,7 @@ function addEachJarInDirRecursive(){ fi } -function addEachJarInDirRecursiveForIntp(){ +function addEachJarInDirRecursiveForIntp() { if [[ -d "${1}" ]]; then for jar in $(find -L "${1}" -type f -name '*jar'); do ZEPPELIN_INTP_CLASSPATH="$jar:$ZEPPELIN_INTP_CLASSPATH" @@ -82,7 +82,7 @@ function addEachJarInDirRecursiveForIntp(){ fi } -function addJarInDir(){ +function addJarInDir() { if [[ -d "${1}" ]]; then ZEPPELIN_CLASSPATH="${1}/*:${ZEPPELIN_CLASSPATH}" fi @@ -96,7 +96,7 @@ function addJarInDirForIntp() { ZEPPELIN_COMMANDLINE_MAIN=org.apache.zeppelin.utils.CommandLineUtils -function getZeppelinVersion(){ +function getZeppelinVersion() { if [[ -d "${ZEPPELIN_HOME}/zeppelin-server/target/classes" ]]; then ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-server/target/classes" fi @@ -106,7 +106,14 @@ function getZeppelinVersion(){ exit 0 } -# Text encoding for +function downloadSparkBinary() { + if [[ -z "${SPARK_HOME}" ]]; then + #echo "SPARK_HOME is not set. Download Spark binary.." + . "${ZEPPELIN_HOME}/bin/download-spark.sh" + fi +} + +# Text encoding for # read/write job into files, # receiving/displaying query/result. if [[ -z "${ZEPPELIN_ENCODING}" ]]; then diff --git a/bin/download-spark.sh b/bin/download-spark.sh index 212c4ba239e..9bf7326dd61 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -1 +1,77 @@ -#!/usr/bin/env bash \ No newline at end of file +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SPARK_VERSION="2.0.0" +HADOOP_VERSION="2.7" + +FWDIR="$(dirname "${BASH_SOURCE-$0}")" +ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)" +ZEPPELIN_ENV="${ZEPPELIN_HOME}/conf/zeppelin-env.sh" +ZEPPELIN_ENV_TEMP="${ZEPPELIN_HOME}/conf/zeppelin-env.sh.template" + +# Downloads file from the given URL. +# Ties 3 times with 1s delay, 20s read and 15s connection timeouts. +# Arguments: url - source URL +function download_with_retry() { + local url="$1" + wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 "${url}" + + if [[ "$?" -ne 0 ]]; then + echo "3 download attempts for ${url} failed" + fi +} + +SPARK_CACHE=".spark-dist" +SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" + +mkdir -p "${SPARK_CACHE}" +cd "${SPARK_CACHE}" +if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then + echo "There is no SPARK_HOME in your system." + echo "Download ${SPARK_ARCHIVE} from mirror before starting Zeppelin server..." + MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1") + + PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') + PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g') + + download_with_retry "${PREFFERED}${PATHINFO}" +fi + +if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then + echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2 + rm -rf "${SPARK_ARCHIVE}" +else + if [[ ! -f "${ZEPPELIN_ENV}" ]]; then + echo "${ZEPPELIN_ENV} doesn't exist." + echo "Creating ${ZEPPELIN_ENV} from ${ZEPPELIN_ENV_TEMP}..." + cp "${ZEPPELIN_ENV_TEMP}" "${ZEPPELIN_ENV}" + fi + export SPARK_HOME="${ZEPPELIN_HOME}/.spark-dist/${SPARK_ARCHIVE}" + + echo "SPARK_HOME is ${SPARK_HOME}" + + # get SPARK_HOME line number in conf/zeppelin-env.sh and substitute to real SPARK_HOME + SPARK_HOME_LINE_NUM=$(grep -n "export SPARK_HOME" "${ZEPPELIN_HOME}/conf/zeppelin-env.sh" | cut -d: -f 1) + # save to zeppelin-env.sh.bak temporarily, then remove .bak file + sed -i .bak "${SPARK_HOME_LINE_NUM}s|.*|export SPARK_HOME=\"${SPARK_HOME}\"|g" "${ZEPPELIN_HOME}/conf/zeppelin-env.sh" + rm "${ZEPPELIN_HOME}/conf/zeppelin-env.sh.bak" +fi + +rm -f "${SPARK_HOME}.tgz" + +set +xe diff --git a/bin/zeppelin-daemon.sh b/bin/zeppelin-daemon.sh index 6bdc1d245d3..0bdd56aecff 100755 --- a/bin/zeppelin-daemon.sh +++ b/bin/zeppelin-daemon.sh @@ -177,6 +177,7 @@ function start() { fi fi + downloadSparkBinary initialize_default_directories echo "ZEPPELIN_CLASSPATH: ${ZEPPELIN_CLASSPATH_OVERRIDES}:${CLASSPATH}" >> "${ZEPPELIN_OUTFILE}" diff --git a/bin/zeppelin.sh b/bin/zeppelin.sh index 92d7f7ead96..1a28c2cde17 100755 --- a/bin/zeppelin.sh +++ b/bin/zeppelin.sh @@ -72,6 +72,8 @@ addJarInDir "${ZEPPELIN_HOME}/zeppelin-web/target/lib" CLASSPATH+=":${ZEPPELIN_CLASSPATH}" +downloadSparkBinary + if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then echo "Log dir doesn't exist, create ${ZEPPELIN_LOG_DIR}" $(mkdir -p "${ZEPPELIN_LOG_DIR}") From 6784015b8da439894dd09bbc3e54477a0f3cba84 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 18 Aug 2016 00:28:51 +0900 Subject: [PATCH 05/43] Remove useless comment line in common.sh --- bin/common.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/common.sh b/bin/common.sh index f005becff0f..babb8d37551 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -108,7 +108,6 @@ function getZeppelinVersion() { function downloadSparkBinary() { if [[ -z "${SPARK_HOME}" ]]; then - #echo "SPARK_HOME is not set. Download Spark binary.." . "${ZEPPELIN_HOME}/bin/download-spark.sh" fi } From c866f0b231432b14c092a365d270e81a2222f54a Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 18 Aug 2016 12:32:11 +0900 Subject: [PATCH 06/43] Remove zeppelin-spark-dependencies from r/pom.xml --- r/pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/r/pom.xml b/r/pom.xml index 2dc2eef5af7..2d2b6702ced 100644 --- a/r/pom.xml +++ b/r/pom.xml @@ -54,12 +54,6 @@ log4j provided - - ${project.groupId} - zeppelin-spark-dependencies_${scala.binary.version} - ${project.version} - provided - ${project.groupId} zeppelin-interpreter From 3fe19bff1bdbdccba63e3163bd7aabfe23a35777 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Sun, 21 Aug 2016 14:38:55 +0900 Subject: [PATCH 07/43] Change SPARK_HOME with proper message --- bin/download-spark.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index 9bf7326dd61..d75f6d17e7d 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -36,14 +36,15 @@ function download_with_retry() { fi } -SPARK_CACHE=".spark-dist" +SPARK_CACHE="interpreter/spark" SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" mkdir -p "${SPARK_CACHE}" cd "${SPARK_CACHE}" if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then echo "There is no SPARK_HOME in your system." - echo "Download ${SPARK_ARCHIVE} from mirror before starting Zeppelin server..." + echo "Zeppelin server will be started after successful downloading ${SPARK_ARCHIVE}" + echo "Download from mirror before starting Zeppelin server..." MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1") PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') @@ -61,7 +62,7 @@ else echo "Creating ${ZEPPELIN_ENV} from ${ZEPPELIN_ENV_TEMP}..." cp "${ZEPPELIN_ENV_TEMP}" "${ZEPPELIN_ENV}" fi - export SPARK_HOME="${ZEPPELIN_HOME}/.spark-dist/${SPARK_ARCHIVE}" + export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}" echo "SPARK_HOME is ${SPARK_HOME}" From 99545233c0e84f48fbf98da25ad131eeba6dd293 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 6 Sep 2016 17:55:20 +0900 Subject: [PATCH 08/43] Check interpreter/spark/ instead of SPARK_HOME --- bin/common.sh | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/bin/common.sh b/bin/common.sh index babb8d37551..6de56492ce3 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -97,17 +97,24 @@ function addJarInDirForIntp() { ZEPPELIN_COMMANDLINE_MAIN=org.apache.zeppelin.utils.CommandLineUtils function getZeppelinVersion() { - if [[ -d "${ZEPPELIN_HOME}/zeppelin-server/target/classes" ]]; then - ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-server/target/classes" - fi - addJarInDir "${ZEPPELIN_HOME}/zeppelin-server/target/lib" - CLASSPATH+=":${ZEPPELIN_CLASSPATH}" - $ZEPPELIN_RUNNER -cp $CLASSPATH $ZEPPELIN_COMMANDLINE_MAIN -v - exit 0 + if [[ -d "${ZEPPELIN_HOME}/zeppelin-server/target/classes" ]]; then + ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-server/target/classes" + fi + addJarInDir "${ZEPPELIN_HOME}/zeppelin-server/target/lib" + CLASSPATH+=":${ZEPPELIN_CLASSPATH}" + $ZEPPELIN_RUNNER -cp $CLASSPATH $ZEPPELIN_COMMANDLINE_MAIN -v + exit 0 } function downloadSparkBinary() { - if [[ -z "${SPARK_HOME}" ]]; then + local SPARK_VERSION + local HADOOP_VERSION + local SPARK_ARCHIVE + SPARK_VERSION="2.0.0" + HADOOP_VERSION="2.7" + SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" + + if [[ ! -d "interpreter/spark/${SPARK_ARCHIVE}" ]]; then . "${ZEPPELIN_HOME}/bin/download-spark.sh" fi } From e6973b3887e9c0d50a1168f26e6f0337f9f78986 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 6 Sep 2016 17:55:40 +0900 Subject: [PATCH 09/43] Refactor download-spark.sh --- bin/download-spark.sh | 108 +++++++++++++++++++++++++++++------------- 1 file changed, 74 insertions(+), 34 deletions(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index d75f6d17e7d..a3859b1f61c 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -16,63 +16,103 @@ # limitations under the License. # -SPARK_VERSION="2.0.0" -HADOOP_VERSION="2.7" +bin=$(dirname "${BASH_SOURCE-$0}") +bin=$(cd "${bin}">/dev/null; pwd) + +. "${bin}/common.sh" FWDIR="$(dirname "${BASH_SOURCE-$0}")" ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)" -ZEPPELIN_ENV="${ZEPPELIN_HOME}/conf/zeppelin-env.sh" -ZEPPELIN_ENV_TEMP="${ZEPPELIN_HOME}/conf/zeppelin-env.sh.template" +ZEPPELIN_ENV="conf/zeppelin-env.sh" +ZEPPELIN_ENV_TEMP="conf/zeppelin-env.sh.template" +ZEPPELIN_VERSION="$(getZeppelinVersion)" -# Downloads file from the given URL. -# Ties 3 times with 1s delay, 20s read and 15s connection timeouts. +SPARK_VERSION="2.0.0" +HADOOP_VERSION="2.7" + +SPARK_CACHE="interpreter/spark" +SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" +ANSWER_FILE="README.txt" + +# Download Spark binary package from the given URL. +# Ties 3 times with 1s delay # Arguments: url - source URL function download_with_retry() { local url="$1" - wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 "${url}" - + curl -O --retry 3 --retry-delay 1 "${url}" if [[ "$?" -ne 0 ]]; then echo "3 download attempts for ${url} failed" fi } -SPARK_CACHE="interpreter/spark" -SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" - -mkdir -p "${SPARK_CACHE}" -cd "${SPARK_CACHE}" -if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then - echo "There is no SPARK_HOME in your system." - echo "Zeppelin server will be started after successful downloading ${SPARK_ARCHIVE}" - echo "Download from mirror before starting Zeppelin server..." - MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1") - - PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') - PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g') +function unzip_spark_bin() { + if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then + echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2 + rm -rf "${SPARK_ARCHIVE}" + else + set_spark_home + fi - download_with_retry "${PREFFERED}${PATHINFO}" -fi + rm -f "${SPARK_ARCHIVE}.tgz" +} -if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then - echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2 - rm -rf "${SPARK_ARCHIVE}" -else +function check_zeppelin_env() { if [[ ! -f "${ZEPPELIN_ENV}" ]]; then echo "${ZEPPELIN_ENV} doesn't exist." echo "Creating ${ZEPPELIN_ENV} from ${ZEPPELIN_ENV_TEMP}..." - cp "${ZEPPELIN_ENV_TEMP}" "${ZEPPELIN_ENV}" + cp "${ZEPPELIN_HOME}/${ZEPPELIN_ENV_TEMP}" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" fi - export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}" +} - echo "SPARK_HOME is ${SPARK_HOME}" +function set_spark_home() { + local line_num + check_zeppelin_env + export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}" + echo -e "SPARK_HOME is ${SPARK_HOME}\n" # get SPARK_HOME line number in conf/zeppelin-env.sh and substitute to real SPARK_HOME - SPARK_HOME_LINE_NUM=$(grep -n "export SPARK_HOME" "${ZEPPELIN_HOME}/conf/zeppelin-env.sh" | cut -d: -f 1) + line_num=$(grep -n "export SPARK_HOME" "${ZEPPELIN_HOME}/conf/zeppelin-env.sh" | cut -d: -f 1) # save to zeppelin-env.sh.bak temporarily, then remove .bak file - sed -i .bak "${SPARK_HOME_LINE_NUM}s|.*|export SPARK_HOME=\"${SPARK_HOME}\"|g" "${ZEPPELIN_HOME}/conf/zeppelin-env.sh" + sed -i .bak "${line_num}s|.*|export SPARK_HOME=\"${SPARK_HOME}\"|g" "${ZEPPELIN_HOME}/conf/zeppelin-env.sh" rm "${ZEPPELIN_HOME}/conf/zeppelin-env.sh.bak" -fi +} + +function save_local_spark() { + cd "${SPARK_CACHE}" + echo "There is no local Spark binary in ${ZEPPELIN_HOME}/${SPARK_CACHE}" + + while true; do + read -p "Do you want to download a latest version of Spark binary? (Y/N): " answer + case $answer in + [Yy]* ) + printf "\nZeppelin server will be started after successful downloading ${SPARK_ARCHIVE}\n" + printf "Download ${SPARK_ARCHIVE}.tgz from mirror before starting Zeppelin server...\n\n" + MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1") + + PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') + PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g') + + download_with_retry "${PREFFERED}${PATHINFO}" + unzip_spark_bin + break + ;; + [Nn]* ) + echo -e "Your answer is saved under ${SPARK_CACHE}/${SPARK_ARCHIVE}/${ANSWER_FILE} \n" + mkdir -p "${SPARK_ARCHIVE}" + cd "${SPARK_ARCHIVE}" + echo -e "Please note that you answered 'No' when we asked whether you want to download local Spark binary under ZEPPELIN_HOME/${SPARK_CACHE}/ or not. + \nIf you want to use Spark interpreter in Apache Zeppelin, you need to set your own SPARK_HOME. + \nSee http://zeppelin.apache.org/docs/${ZEPPELIN_VERSION}/interpreter/spark.html#configuration for the further details about Spark configuration in Zeppelin. + " > "${ANSWER_FILE}" + break + ;; + * ) + echo "Invalid response. Please re-enter (Y/N):" + ;; + esac + done +} -rm -f "${SPARK_HOME}.tgz" +save_local_spark set +xe From 552185ac03f1b5edc9fabb4d381d471c59078903 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Wed, 7 Sep 2016 16:48:15 +0900 Subject: [PATCH 10/43] Revert: remove spark-dependencies --- pom.xml | 1 - spark-dependencies/pom.xml | 1061 ------------------------------------ 2 files changed, 1062 deletions(-) delete mode 100644 spark-dependencies/pom.xml diff --git a/pom.xml b/pom.xml index 80b037394c1..d4775a8e9d2 100644 --- a/pom.xml +++ b/pom.xml @@ -55,7 +55,6 @@ zeppelin-interpreter zeppelin-zengine zeppelin-display - spark-dependencies spark markdown angular diff --git a/spark-dependencies/pom.xml b/spark-dependencies/pom.xml deleted file mode 100644 index 04b6983fb03..00000000000 --- a/spark-dependencies/pom.xml +++ /dev/null @@ -1,1061 +0,0 @@ - - - - - 4.0.0 - - - zeppelin - org.apache.zeppelin - 0.7.0-SNAPSHOT - .. - - - org.apache.zeppelin - zeppelin-spark-dependencies_2.10 - jar - 0.7.0-SNAPSHOT - Zeppelin: Spark dependencies - Zeppelin spark support - - - - - 1.4.1 - 2.3.0 - ${hadoop.version} - 1.7.7 - - 0.7.1 - 2.4.1 - - org.spark-project.akka - 2.3.4-spark - - spark-${spark.version} - - http://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}.tgz - - - http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}-bin-without-hadoop.tgz - - ${project.build.directory}/../../.spark-dist - 0.8.2.1 - - - - - - org.apache.avro - avro - ${avro.version} - - - org.apache.avro - avro-ipc - ${avro.version} - - - io.netty - netty - - - org.mortbay.jetty - jetty - - - org.mortbay.jetty - jetty-util - - - org.mortbay.jetty - servlet-api - - - org.apache.velocity - velocity - - - - - org.apache.avro - avro-mapred - ${avro.version} - ${avro.mapred.classifier} - - - io.netty - netty - - - org.mortbay.jetty - jetty - - - org.mortbay.jetty - jetty-util - - - org.mortbay.jetty - servlet-api - - - org.apache.velocity - velocity - - - - - - - net.java.dev.jets3t - jets3t - ${jets3t.version} - runtime - - - commons-logging - commons-logging - - - - - org.apache.hadoop - hadoop-yarn-api - ${yarn.version} - - - asm - asm - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - commons-logging - commons-logging - - - - - - org.apache.hadoop - hadoop-yarn-common - ${yarn.version} - - - asm - asm - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - javax.servlet - servlet-api - - - commons-logging - commons-logging - - - - - - org.apache.hadoop - hadoop-yarn-server-web-proxy - ${yarn.version} - - - asm - asm - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - javax.servlet - servlet-api - - - commons-logging - commons-logging - - - - - - org.apache.hadoop - hadoop-yarn-client - ${yarn.version} - - - asm - asm - - - org.ow2.asm - asm - - - org.jboss.netty - netty - - - javax.servlet - servlet-api - - - commons-logging - commons-logging - - - - - - - - - - org.apache.spark - spark-core_${scala.binary.version} - ${spark.version} - - - org.apache.hadoop - hadoop-client - - - - - - org.apache.spark - spark-repl_${scala.binary.version} - ${spark.version} - - - - org.apache.spark - spark-sql_${scala.binary.version} - ${spark.version} - - - - org.apache.spark - spark-hive_${scala.binary.version} - ${spark.version} - - - - org.apache.spark - spark-streaming_${scala.binary.version} - ${spark.version} - - - - org.apache.spark - spark-catalyst_${scala.binary.version} - ${spark.version} - - - - - org.apache.hadoop - hadoop-client - ${hadoop.version} - - - - - com.google.protobuf - protobuf-java - ${protobuf.version} - - - - ${akka.group} - akka-actor_${scala.binary.version} - ${akka.version} - - - ${akka.group} - akka-remote_${scala.binary.version} - ${akka.version} - - - ${akka.group} - akka-slf4j_${scala.binary.version} - ${akka.version} - - - ${akka.group} - akka-testkit_${scala.binary.version} - ${akka.version} - - - ${akka.group} - akka-zeromq_${scala.binary.version} - ${akka.version} - - - ${akka.group} - akka-actor_${scala.binary.version} - - - - - - - - - spark-1.1 - - - - - 1.1.1 - 2.2.3-shaded-protobuf - - - - - cassandra-spark-1.1 - - - com.datastax.spark - spark-cassandra-connector_${scala.binary.version} - 1.1.1 - - - org.joda - joda-convert - - - - - - 1.1.1 - 2.2.3-shaded-protobuf - - - - - spark-1.2 - - - - 1.2.1 - - - - - cassandra-spark-1.2 - - 1.2.1 - - - - com.datastax.spark - spark-cassandra-connector_${scala.binary.version} - 1.2.1 - - - org.joda - joda-convert - - - - - - - - spark-1.3 - - - 1.3.1 - - - - - - - - - cassandra-spark-1.3 - - 1.3.0 - - - - - com.datastax.spark - spark-cassandra-connector_${scala.binary.version} - 1.3.1 - - - org.joda - joda-convert - - - - - - - - spark-1.4 - - 1.4.1 - - - - - - - - cassandra-spark-1.4 - - 1.4.1 - - - - - com.datastax.spark - spark-cassandra-connector_${scala.binary.version} - 1.4.0 - - - org.joda - joda-convert - - - - - - - - spark-1.5 - - 1.5.2 - com.typesafe.akka - 2.3.11 - 2.5.0 - - - - - - - - cassandra-spark-1.5 - - 1.5.1 - com.typesafe.akka - 2.3.11 - 2.5.0 - 16.0.1 - - - - - com.datastax.spark - spark-cassandra-connector_${scala.binary.version} - 1.5.0 - - - org.joda - joda-convert - - - - - - - - spark-1.6 - - 1.6.1 - 0.9 - com.typesafe.akka - 2.3.11 - 2.5.0 - - - - - spark-2.0 - - true - - - 2.0.1 - 2.5.0 - 0.10.3 - 2.11.8 - - - - - hadoop-0.23 - - - - org.apache.avro - avro - - - - 0.23.10 - - - - - hadoop-1 - - 1.0.4 - hadoop1 - 1.8.8 - org.spark-project.akka - - - - - hadoop-2.2 - - 2.2.0 - 2.5.0 - hadoop2 - - - - - hadoop-2.3 - - 2.3.0 - 2.5.0 - 0.9.3 - hadoop2 - - - - - hadoop-2.4 - - 2.4.0 - 2.5.0 - 0.9.3 - hadoop2 - - - - - hadoop-2.6 - - 2.6.0 - 2.5.0 - 0.9.3 - hadoop2 - - - - - hadoop-2.7 - - 2.7.2 - 2.5.0 - 0.9.0 - hadoop2 - - - - - mapr3 - - false - - - 1.0.3-mapr-3.0.3 - 2.3.0-mapr-4.0.0-FCS - 0.7.1 - - - - mapr-releases - http://repository.mapr.com/maven/ - - false - - - true - - - - - - - mapr40 - - false - - - 2.4.1-mapr-1503 - 2.4.1-mapr-1503 - 0.9.3 - - - - org.apache.curator - curator-recipes - 2.4.0 - - - org.apache.zookeeper - zookeeper - - - - - org.apache.zookeeper - zookeeper - 3.4.5-mapr-1503 - - - - - mapr-releases - http://repository.mapr.com/maven/ - - false - - - true - - - - - - - mapr41 - - false - - - 2.5.1-mapr-1503 - 2.5.1-mapr-1503 - 0.7.1 - - - - org.apache.curator - curator-recipes - 2.4.0 - - - org.apache.zookeeper - zookeeper - - - - - org.apache.zookeeper - zookeeper - 3.4.5-mapr-1503 - - - - - mapr-releases - http://repository.mapr.com/maven/ - - false - - - true - - - - - - - mapr50 - - false - - - 2.7.0-mapr-1506 - 2.7.0-mapr-1506 - 0.9.3 - - - - org.apache.curator - curator-recipes - 2.4.0 - - - org.apache.zookeeper - zookeeper - - - - - org.apache.zookeeper - zookeeper - 3.4.5-mapr-1503 - - - - - mapr-releases - http://repository.mapr.com/maven/ - - false - - - true - - - - - - - mapr51 - - false - - - 2.7.0-mapr-1602 - 2.7.0-mapr-1602 - 0.9.3 - - - - org.apache.curator - curator-recipes - 2.4.0 - - - org.apache.zookeeper - zookeeper - - - - - org.apache.zookeeper - zookeeper - 3.4.5-mapr-1503 - - - - - mapr-releases - http://repository.mapr.com/maven/ - - false - - - true - - - - - - - - yarn - - - org.apache.spark - spark-yarn_${scala.binary.version} - ${spark.version} - - - - org.apache.hadoop - hadoop-yarn-api - ${yarn.version} - - - - - - pyspark - - - - com.googlecode.maven-download-plugin - download-maven-plugin - 1.2.1 - - - download-pyspark-files - validate - - wget - - - ${spark.download.url} - ${spark.dist.cache} - - - - - - - maven-clean-plugin - - - - ${project.build.directory}/spark-dist - - - ${basedir}/../python/build - - - - - - - org.apache.maven.plugins - maven-antrun-plugin - - - unzip-pyspark-files - validate - - run - - - - - - - - - - zip-pyspark-files - generate-resources - - run - - - - - - - - - - - - - - - - - sparkr - - - - com.googlecode.maven-download-plugin - download-maven-plugin - 1.2.1 - - - download-sparkr-files - validate - - wget - - - ${spark.bin.download.url} - true - ${project.build.directory}/spark-bin-dist - - - - - - maven-clean-plugin - - - - ${project.build.directory}/spark-bin-dist - - - - - - maven-resources-plugin - 2.7 - - - copy-sparkr-files - generate-resources - - copy-resources - - - ${project.build.directory}/../../interpreter/spark/R/lib - - - - ${project.build.directory}/spark-bin-dist/spark-${spark.version}-bin-without-hadoop/R/lib - - - - - - - - - - - - - - - - - maven-enforcer-plugin - 1.3.1 - - - enforce - none - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.17 - - 1 - false - -Xmx1024m -XX:MaxPermSize=256m - - - - - org.apache.maven.plugins - maven-shade-plugin - 2.3 - - - - *:* - - org/datanucleus/** - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - reference.conf - - - - - - package - - shade - - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - 2.8 - - - copy-dependencies - package - - copy-dependencies - - - ${project.build.directory}/../../interpreter/spark/dep - false - false - true - org.datanucleus - - - - package - - copy - - - ${project.build.directory}/../../interpreter/spark/dep - false - false - true - - - ${project.groupId} - ${project.artifactId} - ${project.version} - ${project.packaging} - - - - - - - - - From ffe64d9b264ab3db67d28a045e34c9c4d471058a Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Wed, 7 Sep 2016 22:23:11 +0900 Subject: [PATCH 11/43] Remove useless ZEPPELIN_HOME --- bin/download-spark.cmd | 0 bin/download-spark.sh | 2 -- 2 files changed, 2 deletions(-) create mode 100644 bin/download-spark.cmd diff --git a/bin/download-spark.cmd b/bin/download-spark.cmd new file mode 100644 index 00000000000..e69de29bb2d diff --git a/bin/download-spark.sh b/bin/download-spark.sh index a3859b1f61c..e94e6ad8990 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -21,8 +21,6 @@ bin=$(cd "${bin}">/dev/null; pwd) . "${bin}/common.sh" -FWDIR="$(dirname "${BASH_SOURCE-$0}")" -ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)" ZEPPELIN_ENV="conf/zeppelin-env.sh" ZEPPELIN_ENV_TEMP="conf/zeppelin-env.sh.template" ZEPPELIN_VERSION="$(getZeppelinVersion)" From 5ed33112d64dc3063a29d515d4987e193a909dd0 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 8 Sep 2016 14:51:40 +0900 Subject: [PATCH 12/43] Change dir of Spark bin to 'local-spark' --- bin/common.sh | 4 +++- bin/download-spark.sh | 9 ++++++++- pom.xml | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/bin/common.sh b/bin/common.sh index 6de56492ce3..712465792a7 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -109,12 +109,14 @@ function getZeppelinVersion() { function downloadSparkBinary() { local SPARK_VERSION local HADOOP_VERSION + local SPARK_CACHE local SPARK_ARCHIVE SPARK_VERSION="2.0.0" HADOOP_VERSION="2.7" + SPARK_CACHE="local-spark" SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" - if [[ ! -d "interpreter/spark/${SPARK_ARCHIVE}" ]]; then + if [[ ! -d "${SPARK_ARCHIVE}/${SPARK_ARCHIVE}" ]]; then . "${ZEPPELIN_HOME}/bin/download-spark.sh" fi } diff --git a/bin/download-spark.sh b/bin/download-spark.sh index e94e6ad8990..ffe6620b028 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -28,7 +28,7 @@ ZEPPELIN_VERSION="$(getZeppelinVersion)" SPARK_VERSION="2.0.0" HADOOP_VERSION="2.7" -SPARK_CACHE="interpreter/spark" +SPARK_CACHE="local-spark" SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" ANSWER_FILE="README.txt" @@ -75,7 +75,14 @@ function set_spark_home() { rm "${ZEPPELIN_HOME}/conf/zeppelin-env.sh.bak" } +function create_local_spark_dir() { + if [[ ! -d "${SPARK_CACHE}" ]]; then + mkdir -p "${SPARK_CACHE}" + fi +} + function save_local_spark() { + create_local_spark_dir cd "${SPARK_CACHE}" echo "There is no local Spark binary in ${ZEPPELIN_HOME}/${SPARK_CACHE}" diff --git a/pom.xml b/pom.xml index d4775a8e9d2..7fd0351e971 100644 --- a/pom.xml +++ b/pom.xml @@ -746,6 +746,7 @@ **/run/** **/interpreter/** **/local-repo/** + **/local-spark/** **/null/** **/notebook/** _tools/site/css/* From 1419f0b8d76a8e15ac7646e3827dd536246038d1 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 8 Sep 2016 15:07:20 +0900 Subject: [PATCH 13/43] Set timeout for travis test --- bin/download-spark.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index ffe6620b028..2d3ec516aff 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -87,7 +87,7 @@ function save_local_spark() { echo "There is no local Spark binary in ${ZEPPELIN_HOME}/${SPARK_CACHE}" while true; do - read -p "Do you want to download a latest version of Spark binary? (Y/N): " answer + read -p "Do you want to download a latest version of Spark binary? (Y/N): " -t 20 answer case $answer in [Yy]* ) printf "\nZeppelin server will be started after successful downloading ${SPARK_ARCHIVE}\n" @@ -112,7 +112,8 @@ function save_local_spark() { break ;; * ) - echo "Invalid response. Please re-enter (Y/N):" + echo -e "\nDidn't get any answer in 20 seconds. Please re-start Zeppelin." + exit 0; ;; esac done From a813d922ba29b5c392a908c3199050884266b969 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 8 Sep 2016 15:16:54 +0900 Subject: [PATCH 14/43] Add license header to download-spark.cmd --- bin/download-spark.cmd | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/bin/download-spark.cmd b/bin/download-spark.cmd index e69de29bb2d..636d4ce16f9 100644 --- a/bin/download-spark.cmd +++ b/bin/download-spark.cmd @@ -0,0 +1,16 @@ +@echo off + +REM Licensed to the Apache Software Foundation (ASF) under one or more +REM contributor license agreements. See the NOTICE file distributed with +REM this work for additional information regarding copyright ownership. +REM The ASF licenses this file to You under the Apache License, Version 2.0 +REM (the "License"); you may not use this file except in compliance with +REM the License. You may obtain a copy of the License at +REM +REM http://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, software +REM distributed under the License is distributed on an "AS IS" BASIS, +REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +REM See the License for the specific language governing permissions and +REM limitations under the License. From 368c15aefd650a59c6fb0fdd040efe1bbb2618cc Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 8 Sep 2016 20:48:43 +0900 Subject: [PATCH 15/43] Fix wrong check condition in common.sh --- bin/common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/common.sh b/bin/common.sh index 712465792a7..de5a587870a 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -116,7 +116,7 @@ function downloadSparkBinary() { SPARK_CACHE="local-spark" SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" - if [[ ! -d "${SPARK_ARCHIVE}/${SPARK_ARCHIVE}" ]]; then + if [[ ! -d "${SPARK_CACHE}/${SPARK_ARCHIVE}" ]]; then . "${ZEPPELIN_HOME}/bin/download-spark.sh" fi } From e58075d046f65ae173fecc31c0b648b87f445af4 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 8 Sep 2016 22:14:29 +0900 Subject: [PATCH 16/43] Add travis condition to download-spark.sh --- bin/download-spark.sh | 77 ++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index 2d3ec516aff..07d7a557a80 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -56,8 +56,7 @@ function unzip_spark_bin() { function check_zeppelin_env() { if [[ ! -f "${ZEPPELIN_ENV}" ]]; then - echo "${ZEPPELIN_ENV} doesn't exist." - echo "Creating ${ZEPPELIN_ENV} from ${ZEPPELIN_ENV_TEMP}..." + echo -e "\n${ZEPPELIN_ENV} doesn't exist\nCreating ${ZEPPELIN_ENV} from ${ZEPPELIN_ENV_TEMP}..." cp "${ZEPPELIN_HOME}/${ZEPPELIN_ENV_TEMP}" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" fi } @@ -82,40 +81,52 @@ function create_local_spark_dir() { } function save_local_spark() { - create_local_spark_dir - cd "${SPARK_CACHE}" + local answer + echo "There is no local Spark binary in ${ZEPPELIN_HOME}/${SPARK_CACHE}" while true; do - read -p "Do you want to download a latest version of Spark binary? (Y/N): " -t 20 answer - case $answer in - [Yy]* ) - printf "\nZeppelin server will be started after successful downloading ${SPARK_ARCHIVE}\n" - printf "Download ${SPARK_ARCHIVE}.tgz from mirror before starting Zeppelin server...\n\n" - MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1") - - PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') - PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g') - - download_with_retry "${PREFFERED}${PATHINFO}" - unzip_spark_bin - break - ;; - [Nn]* ) - echo -e "Your answer is saved under ${SPARK_CACHE}/${SPARK_ARCHIVE}/${ANSWER_FILE} \n" - mkdir -p "${SPARK_ARCHIVE}" - cd "${SPARK_ARCHIVE}" - echo -e "Please note that you answered 'No' when we asked whether you want to download local Spark binary under ZEPPELIN_HOME/${SPARK_CACHE}/ or not. - \nIf you want to use Spark interpreter in Apache Zeppelin, you need to set your own SPARK_HOME. - \nSee http://zeppelin.apache.org/docs/${ZEPPELIN_VERSION}/interpreter/spark.html#configuration for the further details about Spark configuration in Zeppelin. - " > "${ANSWER_FILE}" - break - ;; - * ) - echo -e "\nDidn't get any answer in 20 seconds. Please re-start Zeppelin." - exit 0; - ;; - esac + if [[ "${CI}" == "true" ]]; then + break + else + read -p "Do you want to download a latest version of Spark binary? (Y/N): " answer + + case "${answer}" in + [Yy]* ) + create_local_spark_dir + cd "${SPARK_CACHE}" + + printf "\nZeppelin server will be started after successful downloading ${SPARK_ARCHIVE}\n" + printf "Download ${SPARK_ARCHIVE}.tgz from mirror before starting Zeppelin server...\n\n" + + MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1") + PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') + PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g') + + download_with_retry "${PREFFERED}${PATHINFO}" + unzip_spark_bin + break + ;; + [Nn]* ) + create_local_spark_dir + cd "${SPARK_CACHE}" + + echo -e "\nYour answer is saved under ${SPARK_CACHE}/${SPARK_ARCHIVE}/${ANSWER_FILE}" + echo -e "Zeppelin will be started without downloading local Spark binary\n" + + mkdir -p "${SPARK_ARCHIVE}" + + echo -e "Please note that you answered 'No' when we asked whether you want to download local Spark binary under ZEPPELIN_HOME/${SPARK_CACHE}/ or not. + \nIf you want to use Spark interpreter in Apache Zeppelin, you need to set your own SPARK_HOME. + \nSee http://zeppelin.apache.org/docs/${ZEPPELIN_VERSION}/interpreter/spark.html#configuration for the further details about Spark configuration in Zeppelin. + " > "${SPARK_ARCHIVE}/${ANSWER_FILE}" + break + ;; + * ) + echo -e "\nInvalid response" + ;; + esac + fi done } From 89be91b049a646b1a0fc7dcfeb5e8bfde68bdab4 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Mon, 12 Sep 2016 14:42:29 +0900 Subject: [PATCH 17/43] Remove bin/download-spark.cmd again --- bin/download-spark.cmd | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 bin/download-spark.cmd diff --git a/bin/download-spark.cmd b/bin/download-spark.cmd deleted file mode 100644 index 636d4ce16f9..00000000000 --- a/bin/download-spark.cmd +++ /dev/null @@ -1,16 +0,0 @@ -@echo off - -REM Licensed to the Apache Software Foundation (ASF) under one or more -REM contributor license agreements. See the NOTICE file distributed with -REM this work for additional information regarding copyright ownership. -REM The ASF licenses this file to You under the Apache License, Version 2.0 -REM (the "License"); you may not use this file except in compliance with -REM the License. You may obtain a copy of the License at -REM -REM http://www.apache.org/licenses/LICENSE-2.0 -REM -REM Unless required by applicable law or agreed to in writing, software -REM distributed under the License is distributed on an "AS IS" BASIS, -REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -REM See the License for the specific language governing permissions and -REM limitations under the License. From b22364ddba120842933e96eca1e082680cd5407a Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 13 Sep 2016 01:25:31 +0900 Subject: [PATCH 18/43] Remove spark-dependency profiles & reorganize some titles in README.md --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e12d2aedb51..cace430fac1 100644 --- a/README.md +++ b/README.md @@ -21,9 +21,7 @@ To know more about Zeppelin, visit our web site [http://zeppelin.apache.org](htt ## Getting Started ### Install binary package -Please go to [install](http://zeppelin.apache.org/docs/snapshot/install/install.html) to install Apache Zeppelin from binary package. +Please refet to [Zeppelin installation guide](http://zeppelin.apache.org/docs/snapshot/install/install.html) to install Apache Zeppelin from binary package. ### Build from source -Please check [Build from source](http://zeppelin.apache.org/docs/snapshot/install/build.html) to build Zeppelin from source. - - +Please check [How to build Zeppelin from source](http://zeppelin.apache.org/docs/snapshot/install/build.html) to build Zeppelin. From 24dc95faa39586be323365f21a2beb1f683becf8 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 13 Sep 2016 03:30:41 +0900 Subject: [PATCH 19/43] Update spark.md to add a guide for local-spark mode --- docs/interpreter/spark.md | 91 +++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 36 deletions(-) diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index 44ef4f41814..52c1557ccc7 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -62,7 +62,61 @@ Apache Spark is supported in Zeppelin with Spark interpreter group which consist ## Configuration -The Spark interpreter can be configured with properties provided by Zeppelin. +There are two Spark mode to use Spark interpreter in Zeppelin. One is [Local Spark mode](#local-spark-mode) and the other is [system provided Spark mode](#system-provided-spark-mode). +After you start Zeppelin server for the first time, you will be asked whether you want to download a latest version of Spark binary under Zeppelin or not. + +``` +$ ./bin/zeppelin-daemon.sh start +There is no local Spark binary in /ZEPPELIN_HOME/local-spark +Do you want to download a latest version of Spark binary? (Y/N): +``` + +If you are a beginner to Spark and Zeppelin, we recommend you to download [Local Spark](#local-spark-mode) for using Spark interpreter. +In this case, you can use Spark interpreter right after starting Zeppelin without any configurations. +Of course you can use external Spark as well. If so, enter "N" to the above question and see [System provided Spark mode](#system-provided-spark-mode) section for the next step. + +> Please note that Zeppelin doesn't support the local Spark mode for Windows. See [System provided Spark mode](#system-provided-spark-mode) setting guide after [downloading Spark](http://spark.apache.org/downloads.html). + +### Local Spark mode +Nothing needs to be more configured. You can use Spark interpreter right after starting Zeppelin. +`SPARK_HOME` will be set automatically as `ZEPPELIN_HOME/local-spark/spark-x.x.x-hadoop.x.x` and exported in `conf/zeppelin-env.sh`. + +### System provided Spark mode +If you want to connect to your Spark cluster, you'll need to follow below two simple steps. + +#### 1. Export SPARK_HOME +In `conf/zeppelin-env.sh`, export `SPARK_HOME` environment variable with your Spark installation path. + +for example + +```bash +export SPARK_HOME=/usr/lib/spark +``` + +You can optionally export HADOOP\_CONF\_DIR and SPARK\_SUBMIT\_OPTIONS + +```bash +export HADOOP_CONF_DIR=/usr/lib/hadoop +export SPARK_SUBMIT_OPTIONS="--packages com.databricks:spark-csv_2.10:1.2.0" +``` + +For Windows, ensure you have `winutils.exe` in `%HADOOP_HOME%\bin`. For more details please see [Problems running Hadoop on Windows](https://wiki.apache.org/hadoop/WindowsProblems). + +#### 2. Set master via Interpreter menu +After start Zeppelin, go to **Interpreter** menu and edit **master** property in your Spark interpreter setting. The value may vary depending on your Spark cluster deployment type. + +for example, + + * **local[*]** in local mode + * **spark://master:7077** in standalone cluster + * **yarn-client** in Yarn client mode + * **mesos://host:5050** in Mesos cluster + +That's it. Zeppelin will work with any version of Spark and any deployment type without rebuilding Zeppelin in this way. For the Spark version compatibility with Zeppelin, please check the [this table](https://zeppelin.apache.org/download.html#available-interpreters). + + +### Available properties +The Spark interpreter can be configured with below properties provided by Zeppelin. You can also set other Spark properties which are not listed in the table. For a list of additional properties, refer to [Spark Available Properties](http://spark.apache.org/docs/latest/configuration.html#available-properties). @@ -135,41 +189,6 @@ You can also set other Spark properties which are not listed in the table. For a
-Without any configuration, Spark interpreter works out of box in local mode. But if you want to connect to your Spark cluster, you'll need to follow below two simple steps. - -### 1. Export SPARK_HOME -In `conf/zeppelin-env.sh`, export `SPARK_HOME` environment variable with your Spark installation path. - -For example, - -```bash -export SPARK_HOME=/usr/lib/spark -``` - -You can optionally export `HADOOP_CONF_DIR` and `SPARK_SUBMIT_OPTIONS` - -```bash -export HADOOP_CONF_DIR=/usr/lib/hadoop -export SPARK_SUBMIT_OPTIONS="--packages com.databricks:spark-csv_2.10:1.2.0" -``` - -For Windows, ensure you have `winutils.exe` in `%HADOOP_HOME%\bin`. Please see [Problems running Hadoop on Windows](https://wiki.apache.org/hadoop/WindowsProblems) for the details. - -### 2. Set master in Interpreter menu -After start Zeppelin, go to **Interpreter** menu and edit **master** property in your Spark interpreter setting. The value may vary depending on your Spark cluster deployment type. - -For example, - - * **local[*]** in local mode - * **spark://master:7077** in standalone cluster - * **yarn-client** in Yarn client mode - * **mesos://host:5050** in Mesos cluster - -That's it. Zeppelin will work with any version of Spark and any deployment type without rebuilding Zeppelin in this way. -For the further information about Spark & Zeppelin version compatibility, please refer to "Available Interpreters" section in [Zeppelin download page](https://zeppelin.apache.org/download.html). - -> Note that without exporting `SPARK_HOME`, it's running in local mode with included version of Spark. The included version may vary depending on the build profile. - ## SparkContext, SQLContext, SparkSession, ZeppelinContext SparkContext, SQLContext and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext` and `z`, respectively, in Scala, Python and R environments. Staring from 0.6.1 SparkSession is available as variable `spark` when you are using Spark 2.x. From 2537fa14d5e13c34be9eeab932bf5dc853bda5d4 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 13 Sep 2016 03:49:49 +0900 Subject: [PATCH 20/43] Remove '-Ppyspark' build options --- .travis.yml | 3 +-- dev/create_release.sh | 4 ++-- dev/publish_release.sh | 2 +- docs/install/virtual_machine.md | 4 ++-- scripts/vagrant/zeppelin-dev/README.md | 4 ++-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 23899679a0b..119f186bec8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,8 +52,7 @@ matrix: # Test spark module for 1.5.2 - jdk: "oraclejdk7" - env: SCALA_VER="2.10" SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false" - + env: SCALA_VER="2.10" SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false" # Test spark module for 1.4.1 - jdk: "oraclejdk7" env: SCALA_VER="2.10" SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false" diff --git a/dev/create_release.sh b/dev/create_release.sh index 272713baa6b..6b8556060d8 100755 --- a/dev/create_release.sh +++ b/dev/create_release.sh @@ -103,8 +103,8 @@ function make_binary_release() { git_clone make_source_package -make_binary_release all "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11" -make_binary_release netinst "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11 -pl !alluxio,!angular,!cassandra,!elasticsearch,!file,!flink,!hbase,!ignite,!jdbc,!kylin,!lens,!livy,!markdown,!postgresql,!python,!shell,!bigquery" +make_binary_release all "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Psparkr -Pr -Pscala-2.11" +make_binary_release netinst "-Pspark-2.0 -Phadoop-2.4 -Pyarn -Psparkr -Pr -Pscala-2.11 -pl !alluxio,!angular,!cassandra,!elasticsearch,!file,!flink,!hbase,!ignite,!jdbc,!kylin,!lens,!livy,!markdown,!postgresql,!python,!shell,!bigquery" # remove non release files and dirs rm -rf "${WORKING_DIR}/zeppelin" diff --git a/dev/publish_release.sh b/dev/publish_release.sh index fd1083ac974..2d8900c1fba 100755 --- a/dev/publish_release.sh +++ b/dev/publish_release.sh @@ -44,7 +44,7 @@ NC='\033[0m' # No Color RELEASE_VERSION="$1" GIT_TAG="$2" -PUBLISH_PROFILES="-Ppublish-distr -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr" +PUBLISH_PROFILES="-Ppublish-distr -Pspark-2.0 -Phadoop-2.4 -Pyarn -Psparkr -Pr" PROJECT_OPTIONS="-pl !zeppelin-distribution" NEXUS_STAGING="https://repository.apache.org/service/local/staging" NEXUS_PROFILE="153446d1ac37c4" diff --git a/docs/install/virtual_machine.md b/docs/install/virtual_machine.md index 6456bc5de8e..c33edecb301 100644 --- a/docs/install/virtual_machine.md +++ b/docs/install/virtual_machine.md @@ -106,11 +106,11 @@ The virtual machine consists of: ## How to build & run Zeppelin -This assumes you've already cloned the project either on the host machine in the zeppelin-dev directory (to be shared with the guest machine) or cloned directly into a directory while running inside the guest machine. The following build steps will also include Python and R support via PySpark and SparkR: +This assumes you've already cloned the project either on the host machine in the zeppelin-dev directory (to be shared with the guest machine) or cloned directly into a directory while running inside the guest machine. The following build steps will also include Python and R support via PySpark and SparkR: ``` cd /zeppelin -mvn clean package -Pspark-1.6 -Ppyspark -Phadoop-2.4 -Psparkr -DskipTests +mvn clean package -Pspark-1.6 -Phadoop-2.4 -Psparkr -DskipTests ./bin/zeppelin-daemon.sh start ``` diff --git a/scripts/vagrant/zeppelin-dev/README.md b/scripts/vagrant/zeppelin-dev/README.md index fd428d69208..4d0e6c7c1d3 100644 --- a/scripts/vagrant/zeppelin-dev/README.md +++ b/scripts/vagrant/zeppelin-dev/README.md @@ -83,11 +83,11 @@ The virtual machine consists of: ### How to build & run Zeppelin -This assumes you've already cloned the project either on the host machine in the zeppelin-dev directory (to be shared with the guest machine) or cloned directly into a directory while running inside the guest machine. The following build steps will also include Python and R support via PySpark and SparkR: +This assumes you've already cloned the project either on the host machine in the zeppelin-dev directory (to be shared with the guest machine) or cloned directly into a directory while running inside the guest machine. The following build steps will also include Python and R support via PySpark and SparkR: ``` cd /zeppelin -mvn clean package -Pspark-1.6 -Ppyspark -Phadoop-2.4 -Psparkr -DskipTests +mvn clean package -Pspark-1.6 -Phadoop-2.4 -Psparkr -DskipTests ./bin/zeppelin-daemon.sh start ``` From ca534e596c36ced04f832b0a7ab7e78e951929e1 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 13 Sep 2016 17:09:18 +0900 Subject: [PATCH 21/43] Remove useless creating .bak file process --- bin/download-spark.sh | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index 07d7a557a80..fbe16e69511 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -22,7 +22,7 @@ bin=$(cd "${bin}">/dev/null; pwd) . "${bin}/common.sh" ZEPPELIN_ENV="conf/zeppelin-env.sh" -ZEPPELIN_ENV_TEMP="conf/zeppelin-env.sh.template" +ZEPPELIN_ENV_TEMP="${ZEPPELIN_ENV}.template" ZEPPELIN_VERSION="$(getZeppelinVersion)" SPARK_VERSION="2.0.0" @@ -39,7 +39,7 @@ function download_with_retry() { local url="$1" curl -O --retry 3 --retry-delay 1 "${url}" if [[ "$?" -ne 0 ]]; then - echo "3 download attempts for ${url} failed" + echo -e "3 download attempts for ${url} failed.\nPlease restart Zeppelin if you want to download local Spark again." fi } @@ -68,10 +68,8 @@ function set_spark_home() { echo -e "SPARK_HOME is ${SPARK_HOME}\n" # get SPARK_HOME line number in conf/zeppelin-env.sh and substitute to real SPARK_HOME - line_num=$(grep -n "export SPARK_HOME" "${ZEPPELIN_HOME}/conf/zeppelin-env.sh" | cut -d: -f 1) - # save to zeppelin-env.sh.bak temporarily, then remove .bak file - sed -i .bak "${line_num}s|.*|export SPARK_HOME=\"${SPARK_HOME}\"|g" "${ZEPPELIN_HOME}/conf/zeppelin-env.sh" - rm "${ZEPPELIN_HOME}/conf/zeppelin-env.sh.bak" + line_num=$(grep -n "export SPARK_HOME" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" | cut -d: -f 1) + sed -i "${line_num}s|.*|export SPARK_HOME=\"${SPARK_HOME}\"|g" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" } function create_local_spark_dir() { From edd525d0f6eac0a956bc64f58e77ac3afc423f58 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 13 Sep 2016 20:21:10 +0900 Subject: [PATCH 22/43] Update install.md & spark.md --- docs/install/install.md | 14 +++++++++++++- docs/interpreter/spark.md | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/docs/install/install.md b/docs/install/install.md index 4d8965714db..04a485aa476 100644 --- a/docs/install/install.md +++ b/docs/install/install.md @@ -79,6 +79,18 @@ After Zeppelin has started successfully, go to [http://localhost:8080](http://lo bin/zeppelin-daemon.sh stop ``` +#### Getting the latest version of Apache Spark binary + +If you are a beginner to Spark and Zeppelin, we would recommend you to download **Zeppelin embedded Spark** for using Spark interpreter. +In this case, you can get the latest version of Spark with below command. + +``` +./bin/zeppelin-daemon.sh get-spark +``` + +Then the downloaded Spark binary will be saved under `ZEPPELIN_HOME/local-spark/`. +Please check [Local Spark mode in Zeppelin](../interpreter/spark.html#local-spark-mode) for the more information. + ## Next Steps Congratulations, you have successfully installed Apache Zeppelin! Here are few steps you might find useful: @@ -377,4 +389,4 @@ exec bin/zeppelin-daemon.sh upstart ## Building from Source -If you want to build from source instead of using binary package, follow the instructions [here](./build.html). \ No newline at end of file +If you want to build from source instead of using binary package, follow the instructions [here](./build.html). diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index 52c1557ccc7..ee28b9d36c9 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -62,7 +62,7 @@ Apache Spark is supported in Zeppelin with Spark interpreter group which consist ## Configuration -There are two Spark mode to use Spark interpreter in Zeppelin. One is [Local Spark mode](#local-spark-mode) and the other is [system provided Spark mode](#system-provided-spark-mode). +There are two Spark mode to use Spark interpreter in Zeppelin. One is [local Spark mode](#local-spark-mode) and the other is [system provided Spark mode](#system-provided-spark-mode). After you start Zeppelin server for the first time, you will be asked whether you want to download a latest version of Spark binary under Zeppelin or not. ``` From a9b110a809463ac1795e76a30b9cd2df6c40292d Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Wed, 14 Sep 2016 18:35:37 +0900 Subject: [PATCH 23/43] Resolve 'sed' command issue between OSX & Linux --- bin/download-spark.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index fbe16e69511..3fc0df43854 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -69,7 +69,14 @@ function set_spark_home() { # get SPARK_HOME line number in conf/zeppelin-env.sh and substitute to real SPARK_HOME line_num=$(grep -n "export SPARK_HOME" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" | cut -d: -f 1) - sed -i "${line_num}s|.*|export SPARK_HOME=\"${SPARK_HOME}\"|g" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" + + # sed command with -i option fails on OSX, but works on Linux + # '-ie' will resolve this issue but create useless 'zeppelin-env.she' file + sed -ie "${line_num}s|.*|export SPARK_HOME=\"${SPARK_HOME}\"|g" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" + + if [ -f "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}e" ]; then + rm "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}e" + fi } function create_local_spark_dir() { From f383d3afb8f9e2c1e240f69d8d970c469d0a9ced Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Wed, 14 Sep 2016 20:20:31 +0900 Subject: [PATCH 24/43] Trap ctrl+c during downloading Spark --- bin/download-spark.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index 3fc0df43854..1c0bf8c875c 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -35,11 +35,15 @@ ANSWER_FILE="README.txt" # Download Spark binary package from the given URL. # Ties 3 times with 1s delay # Arguments: url - source URL + +trap "download_with_retry; rm -r ${ZEPPELIN_HOME}/${SPARK_CACHE}; exit 1" SIGTERM SIGINT SIGQUIT + function download_with_retry() { local url="$1" curl -O --retry 3 --retry-delay 1 "${url}" - if [[ "$?" -ne 0 ]]; then - echo -e "3 download attempts for ${url} failed.\nPlease restart Zeppelin if you want to download local Spark again." + + if [[ "$?" -ne 0 || -z "${url}" ]]; then + echo -e "\nStop downloading with unexpected error.\nPlease restart Zeppelin if you want to download local Spark again." fi } @@ -121,7 +125,7 @@ function save_local_spark() { mkdir -p "${SPARK_ARCHIVE}" - echo -e "Please note that you answered 'No' when we asked whether you want to download local Spark binary under ZEPPELIN_HOME/${SPARK_CACHE}/ or not. + echo -e "Please note that you answered 'No' when we asked whether you want to download local Spark binary under ${ZEPPELIN_HOME}/${SPARK_CACHE}/ or not. \nIf you want to use Spark interpreter in Apache Zeppelin, you need to set your own SPARK_HOME. \nSee http://zeppelin.apache.org/docs/${ZEPPELIN_VERSION}/interpreter/spark.html#configuration for the further details about Spark configuration in Zeppelin. " > "${SPARK_ARCHIVE}/${ANSWER_FILE}" From 527ef5b6518d3477d9731422cad190a59df11d1e Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Wed, 14 Sep 2016 20:26:56 +0900 Subject: [PATCH 25/43] Remove useless condition --- bin/download-spark.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index 1c0bf8c875c..c16ff6d8aeb 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -42,7 +42,7 @@ function download_with_retry() { local url="$1" curl -O --retry 3 --retry-delay 1 "${url}" - if [[ "$?" -ne 0 || -z "${url}" ]]; then + if [[ "$?" -ne 0 ]]; then echo -e "\nStop downloading with unexpected error.\nPlease restart Zeppelin if you want to download local Spark again." fi } From 555372a655b788b3b0fdd85d430b6f063ce13834 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Wed, 21 Sep 2016 02:05:16 +0900 Subject: [PATCH 26/43] Make local spark mode with zero-configuration as @moon suggested --- bin/common.sh | 15 ++++++--------- bin/download-spark.sh | 40 +++++++++------------------------------- bin/interpreter.sh | 32 ++++++++++++++++---------------- 3 files changed, 31 insertions(+), 56 deletions(-) diff --git a/bin/common.sh b/bin/common.sh index de5a587870a..eea24634037 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -106,16 +106,13 @@ function getZeppelinVersion() { exit 0 } -function downloadSparkBinary() { - local SPARK_VERSION - local HADOOP_VERSION - local SPARK_CACHE - local SPARK_ARCHIVE - SPARK_VERSION="2.0.0" - HADOOP_VERSION="2.7" - SPARK_CACHE="local-spark" - SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" +SPARK_VERSION="2.0.0" +HADOOP_VERSION="2.7" +SPARK_CACHE="local-spark" +SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" + +function downloadSparkBinary() { if [[ ! -d "${SPARK_CACHE}/${SPARK_ARCHIVE}" ]]; then . "${ZEPPELIN_HOME}/bin/download-spark.sh" fi diff --git a/bin/download-spark.sh b/bin/download-spark.sh index c16ff6d8aeb..0ade495884f 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -35,9 +35,6 @@ ANSWER_FILE="README.txt" # Download Spark binary package from the given URL. # Ties 3 times with 1s delay # Arguments: url - source URL - -trap "download_with_retry; rm -r ${ZEPPELIN_HOME}/${SPARK_CACHE}; exit 1" SIGTERM SIGINT SIGQUIT - function download_with_retry() { local url="$1" curl -O --retry 3 --retry-delay 1 "${url}" @@ -51,36 +48,10 @@ function unzip_spark_bin() { if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2 rm -rf "${SPARK_ARCHIVE}" - else - set_spark_home fi rm -f "${SPARK_ARCHIVE}.tgz" -} - -function check_zeppelin_env() { - if [[ ! -f "${ZEPPELIN_ENV}" ]]; then - echo -e "\n${ZEPPELIN_ENV} doesn't exist\nCreating ${ZEPPELIN_ENV} from ${ZEPPELIN_ENV_TEMP}..." - cp "${ZEPPELIN_HOME}/${ZEPPELIN_ENV_TEMP}" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" - fi -} - -function set_spark_home() { - local line_num - check_zeppelin_env - export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}" - echo -e "SPARK_HOME is ${SPARK_HOME}\n" - - # get SPARK_HOME line number in conf/zeppelin-env.sh and substitute to real SPARK_HOME - line_num=$(grep -n "export SPARK_HOME" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" | cut -d: -f 1) - - # sed command with -i option fails on OSX, but works on Linux - # '-ie' will resolve this issue but create useless 'zeppelin-env.she' file - sed -ie "${line_num}s|.*|export SPARK_HOME=\"${SPARK_HOME}\"|g" "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}" - - if [ -f "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}e" ]; then - rm "${ZEPPELIN_HOME}/${ZEPPELIN_ENV}e" - fi + echo -e "\n${SPARK_ARCHIVE} is successfully downloaded and saved under ${ZEPPELIN_HOME}/${SPARK_CACHE}\n" } function create_local_spark_dir() { @@ -89,11 +60,18 @@ function create_local_spark_dir() { fi } +function check_local_spark_dir() { + if [[ -d "${ZEPPELIN_HOME}/${SPARK_CACHE}" ]]; then + rm -r "${ZEPPELIN_HOME}/${SPARK_CACHE}" + fi +} + function save_local_spark() { local answer - echo "There is no local Spark binary in ${ZEPPELIN_HOME}/${SPARK_CACHE}" + echo -e "For using Spark interpreter in local mode(without external Spark installation), Spark binary needs to be downloaded." + trap "echo -e '\n\nForced termination by user. Please restart Zeppelin again.'; check_local_spark_dir; exit 1" SIGTERM SIGINT SIGQUIT while true; do if [[ "${CI}" == "true" ]]; then break diff --git a/bin/interpreter.sh b/bin/interpreter.sh index 38bddb7fae7..0156e851bc7 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -20,7 +20,7 @@ bin=$(dirname "${BASH_SOURCE-$0}") bin=$(cd "${bin}">/dev/null; pwd) function usage() { - echo "usage) $0 -p -d -l " + echo "usage) $0 -p -d -l " } while getopts "hp:d:l:v:u:" o; do @@ -55,8 +55,8 @@ done if [ -z "${PORT}" ] || [ -z "${INTERPRETER_DIR}" ]; then - usage - exit 1 + usage + exit 1 fi . "${bin}/common.sh" @@ -98,17 +98,18 @@ fi # set spark related env variables if [[ "${INTERPRETER_ID}" == "spark" ]]; then + SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)" + # This will eventually passes SPARK_APP_JAR to classpath of SparkIMain + ZEPPELIN_INTP_CLASSPATH+=":${SPARK_APP_JAR}" + if [[ -n "${SPARK_HOME}" ]]; then export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit" - SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)" - # This will eventually passes SPARK_APP_JAR to classpath of SparkIMain - ZEPPELIN_INTP_CLASSPATH+=":${SPARK_APP_JAR}" - pattern="$SPARK_HOME/python/lib/py4j-*-src.zip" + pattern="${SPARK_HOME}/python/lib/py4j-*-src.zip" py4j=($pattern) # pick the first match py4j zip - there should only be one - export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH" - export PYTHONPATH="${py4j[0]}:$PYTHONPATH" + export PYTHONPATH="${SPARK_HOME}/python/:${PYTHONPATH}" + export PYTHONPATH="${py4j[0]}:${PYTHONPATH}" else # add Hadoop jars into classpath if [[ -n "${HADOOP_HOME}" ]]; then @@ -120,12 +121,13 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then addJarInDirForIntp "${HADOOP_HOME}/lib" fi - addJarInDirForIntp "${INTERPRETER_DIR}/dep" + # If there is not SPARK_HOME in the system, Zeppelin will use local Spark binary for Spark interpreter + export SPARK_SUBMIT="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}/bin/spark-submit" - pattern="${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-*-src.zip" + pattern="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}/python/lib/py4j-*-src.zip" py4j=($pattern) # pick the first match py4j zip - there should only be one - PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${py4j[0]}" + PYSPARKPATH="${ZEPPELIN_HOME}/${SPARK_CACHE}/${SPARK_ARCHIVE}/python/lib/pyspark.zip:${py4j[0]}" if [[ -z "${PYTHONPATH}" ]]; then export PYTHONPATH="${PYSPARKPATH}" @@ -146,8 +148,6 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}" fi - - export SPARK_CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}" fi elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then if [[ -n "${HBASE_CONF_DIR}" ]]; then @@ -186,9 +186,9 @@ addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}" CLASSPATH+=":${ZEPPELIN_INTP_CLASSPATH}" if [[ -n "${SPARK_SUBMIT}" ]]; then - ${ZEPPELIN_IMPERSONATE_RUN_CMD} `${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}" --driver-java-options "${JAVA_INTP_OPTS}" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT} &` + ${ZEPPELIN_IMPERSONATE_RUN_CMD} `${SPARK_SUBMIT} --class ${ZEPPELIN_SERVER} --driver-class-path "${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH}" --driver-java-options "${JAVA_INTP_OPTS}" ${SPARK_SUBMIT_OPTIONS} ${SPARK_APP_JAR} ${PORT} &` else - ${ZEPPELIN_IMPERSONATE_RUN_CMD} ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} & + ${ZEPPELIN_IMPERSONATE_RUN_CMD} ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} ${ZEPPELIN_INTP_MEM} -cp ${ZEPPELIN_INTP_CLASSPATH_OVERRIDES}:${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} & fi pid=$! From de87cb2adf5ad510a712e4f696ae127c7a414077 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 22 Sep 2016 23:20:31 +0900 Subject: [PATCH 27/43] Modify SparkRInterpreter.java to enable SparkR without SPARK_HOME --- .../org/apache/zeppelin/spark/SparkRInterpreter.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java index 15ce6581698..9079680eec2 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java @@ -53,12 +53,18 @@ public void open() { String rCmdPath = getProperty("zeppelin.R.cmd"); String sparkRLibPath; + // See bin/download-spark.sh for the below env variables + String sparkchachePath = System.getenv("SPARK_CACHE"); + String sparkArchivePath = System.getenv("SPARK_ARCHIVE"); + if (System.getenv("SPARK_HOME") != null) { sparkRLibPath = System.getenv("SPARK_HOME") + "/R/lib"; } else { - sparkRLibPath = System.getenv("ZEPPELIN_HOME") + "/interpreter/spark/R/lib"; + sparkRLibPath = System.getenv("ZEPPELIN_HOME") + + sparkchachePath + sparkArchivePath + "/R/lib"; // workaround to make sparkr work without SPARK_HOME - System.setProperty("spark.test.home", System.getenv("ZEPPELIN_HOME") + "/interpreter/spark"); + System.setProperty("spark.test.home", System.getenv("ZEPPELIN_HOME") + + sparkchachePath + sparkArchivePath + "/R/lib"); } synchronized (SparkRBackend.backend()) { From 1dd51d8e1dcb8d65e22a1cc67a5d089c5d7c196b Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Fri, 23 Sep 2016 02:01:40 +0900 Subject: [PATCH 28/43] Remove duplicated variable declaration --- bin/download-spark.sh | 5 ----- .../java/org/apache/zeppelin/spark/SparkRInterpreter.java | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index 0ade495884f..bf3f1475fec 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -25,11 +25,6 @@ ZEPPELIN_ENV="conf/zeppelin-env.sh" ZEPPELIN_ENV_TEMP="${ZEPPELIN_ENV}.template" ZEPPELIN_VERSION="$(getZeppelinVersion)" -SPARK_VERSION="2.0.0" -HADOOP_VERSION="2.7" - -SPARK_CACHE="local-spark" -SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" ANSWER_FILE="README.txt" # Download Spark binary package from the given URL. diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java index 9079680eec2..d529999b4b2 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java @@ -53,7 +53,7 @@ public void open() { String rCmdPath = getProperty("zeppelin.R.cmd"); String sparkRLibPath; - // See bin/download-spark.sh for the below env variables + // SPARK_CACHE and SPARK_ARCHIVE are defined in bin/common.sh String sparkchachePath = System.getenv("SPARK_CACHE"); String sparkArchivePath = System.getenv("SPARK_ARCHIVE"); From f068bef554507e7125865f77816986d5b085a7b3 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Fri, 23 Sep 2016 02:02:01 +0900 Subject: [PATCH 29/43] Update related docs again --- docs/interpreter/spark.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index ee28b9d36c9..fa80c3d230c 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -67,7 +67,7 @@ After you start Zeppelin server for the first time, you will be asked whether yo ``` $ ./bin/zeppelin-daemon.sh start -There is no local Spark binary in /ZEPPELIN_HOME/local-spark +For using Spark interpreter in local mode(without external Spark installation), Spark binary needs to be downloaded. Do you want to download a latest version of Spark binary? (Y/N): ``` @@ -79,7 +79,6 @@ Of course you can use external Spark as well. If so, enter "N" to the above ques ### Local Spark mode Nothing needs to be more configured. You can use Spark interpreter right after starting Zeppelin. -`SPARK_HOME` will be set automatically as `ZEPPELIN_HOME/local-spark/spark-x.x.x-hadoop.x.x` and exported in `conf/zeppelin-env.sh`. ### System provided Spark mode If you want to connect to your Spark cluster, you'll need to follow below two simple steps. From 437f2063a39d2a7a583bb647cb885e51a0990098 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Fri, 23 Sep 2016 14:37:57 +0900 Subject: [PATCH 30/43] Fix typo in SparkRInterpreter.java --- .../java/org/apache/zeppelin/spark/SparkRInterpreter.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java index d529999b4b2..12990fe147e 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkRInterpreter.java @@ -54,17 +54,17 @@ public void open() { String sparkRLibPath; // SPARK_CACHE and SPARK_ARCHIVE are defined in bin/common.sh - String sparkchachePath = System.getenv("SPARK_CACHE"); + String sparkCachePath = System.getenv("SPARK_CACHE"); String sparkArchivePath = System.getenv("SPARK_ARCHIVE"); if (System.getenv("SPARK_HOME") != null) { sparkRLibPath = System.getenv("SPARK_HOME") + "/R/lib"; } else { sparkRLibPath = System.getenv("ZEPPELIN_HOME") - + sparkchachePath + sparkArchivePath + "/R/lib"; + + sparkCachePath + sparkArchivePath + "/R/lib"; // workaround to make sparkr work without SPARK_HOME System.setProperty("spark.test.home", System.getenv("ZEPPELIN_HOME") - + sparkchachePath + sparkArchivePath + "/R/lib"); + + sparkCachePath + sparkArchivePath + "/R/lib"); } synchronized (SparkRBackend.backend()) { From 6caef52c37b844f047c11a678f5162a2eba8f374 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Mon, 17 Oct 2016 17:38:04 +0900 Subject: [PATCH 31/43] Fix rebasing mistake --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 119f186bec8..d5f44f9de94 100644 --- a/.travis.yml +++ b/.travis.yml @@ -53,6 +53,7 @@ matrix: # Test spark module for 1.5.2 - jdk: "oraclejdk7" env: SCALA_VER="2.10" SPARK_VER="1.5.2" HADOOP_VER="2.3" PROFILE="-Pspark-1.5 -Pr -Phadoop-2.3 -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false" + # Test spark module for 1.4.1 - jdk: "oraclejdk7" env: SCALA_VER="2.10" SPARK_VER="1.4.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.4 -Pr -Phadoop-2.3 -Psparkr" BUILD_FLAG="package -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark,r -Dtest=org.apache.zeppelin.rest.*Test,org.apache.zeppelin.spark.* -DfailIfNoTests=false" From d8e3aba4f47ed629b4db8e8e962c2ad466059910 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Mon, 24 Oct 2016 23:26:58 +0900 Subject: [PATCH 32/43] Add get-spark option instead of getting user's answer --- bin/download-spark.sh | 63 ++++++++++-------------------------------- bin/zeppelin-daemon.sh | 6 ++-- bin/zeppelin.sh | 6 ++-- 3 files changed, 22 insertions(+), 53 deletions(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index bf3f1475fec..39786d510d6 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -35,7 +35,7 @@ function download_with_retry() { curl -O --retry 3 --retry-delay 1 "${url}" if [[ "$?" -ne 0 ]]; then - echo -e "\nStop downloading with unexpected error.\nPlease restart Zeppelin if you want to download local Spark again." + echo -e "\nStop downloading with unexpected error." fi } @@ -62,54 +62,19 @@ function check_local_spark_dir() { } function save_local_spark() { - local answer - - echo -e "For using Spark interpreter in local mode(without external Spark installation), Spark binary needs to be downloaded." - - trap "echo -e '\n\nForced termination by user. Please restart Zeppelin again.'; check_local_spark_dir; exit 1" SIGTERM SIGINT SIGQUIT - while true; do - if [[ "${CI}" == "true" ]]; then - break - else - read -p "Do you want to download a latest version of Spark binary? (Y/N): " answer - - case "${answer}" in - [Yy]* ) - create_local_spark_dir - cd "${SPARK_CACHE}" - - printf "\nZeppelin server will be started after successful downloading ${SPARK_ARCHIVE}\n" - printf "Download ${SPARK_ARCHIVE}.tgz from mirror before starting Zeppelin server...\n\n" - - MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1") - PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') - PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g') - - download_with_retry "${PREFFERED}${PATHINFO}" - unzip_spark_bin - break - ;; - [Nn]* ) - create_local_spark_dir - cd "${SPARK_CACHE}" - - echo -e "\nYour answer is saved under ${SPARK_CACHE}/${SPARK_ARCHIVE}/${ANSWER_FILE}" - echo -e "Zeppelin will be started without downloading local Spark binary\n" - - mkdir -p "${SPARK_ARCHIVE}" - - echo -e "Please note that you answered 'No' when we asked whether you want to download local Spark binary under ${ZEPPELIN_HOME}/${SPARK_CACHE}/ or not. - \nIf you want to use Spark interpreter in Apache Zeppelin, you need to set your own SPARK_HOME. - \nSee http://zeppelin.apache.org/docs/${ZEPPELIN_VERSION}/interpreter/spark.html#configuration for the further details about Spark configuration in Zeppelin. - " > "${SPARK_ARCHIVE}/${ANSWER_FILE}" - break - ;; - * ) - echo -e "\nInvalid response" - ;; - esac - fi - done + # echo -e "For using Spark interpreter in local mode(without external Spark installation), Spark binary needs to be downloaded." + trap "echo -e '\n\nForced termination by user.'; check_local_spark_dir; exit 1" SIGTERM SIGINT SIGQUIT + create_local_spark_dir + cd "${SPARK_CACHE}" + + printf "Download ${SPARK_ARCHIVE}.tgz from mirror ...\n\n" + + MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1") + PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') + PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g') + + download_with_retry "${PREFFERED}${PATHINFO}" + unzip_spark_bin } save_local_spark diff --git a/bin/zeppelin-daemon.sh b/bin/zeppelin-daemon.sh index 0bdd56aecff..c3e660da07c 100755 --- a/bin/zeppelin-daemon.sh +++ b/bin/zeppelin-daemon.sh @@ -20,7 +20,7 @@ # USAGE="-e Usage: zeppelin-daemon.sh\n\t - [--config ] {start|stop|upstart|restart|reload|status}\n\t + [--config ] {start|stop|upstart|restart|reload|status|get-spark}\n\t [--version | -v]" if [[ "$1" == "--config" ]]; then @@ -177,7 +177,6 @@ function start() { fi fi - downloadSparkBinary initialize_default_directories echo "ZEPPELIN_CLASSPATH: ${ZEPPELIN_CLASSPATH_OVERRIDES}:${CLASSPATH}" >> "${ZEPPELIN_OUTFILE}" @@ -266,6 +265,9 @@ case "${1}" in status) find_zeppelin_process ;; + get-spark) + downloadSparkBinary + ;; -v | --version) getZeppelinVersion ;; diff --git a/bin/zeppelin.sh b/bin/zeppelin.sh index 1a28c2cde17..c59def79214 100755 --- a/bin/zeppelin.sh +++ b/bin/zeppelin.sh @@ -43,6 +43,10 @@ if [ "$1" == "--version" ] || [ "$1" == "-v" ]; then getZeppelinVersion fi +if [ "$1" == "get-spark" ]; then + downloadSparkBinary +fi + HOSTNAME=$(hostname) ZEPPELIN_LOGFILE="${ZEPPELIN_LOG_DIR}/zeppelin-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.log" LOG="${ZEPPELIN_LOG_DIR}/zeppelin-cli-${ZEPPELIN_IDENT_STRING}-${HOSTNAME}.out" @@ -72,8 +76,6 @@ addJarInDir "${ZEPPELIN_HOME}/zeppelin-web/target/lib" CLASSPATH+=":${ZEPPELIN_CLASSPATH}" -downloadSparkBinary - if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then echo "Log dir doesn't exist, create ${ZEPPELIN_LOG_DIR}" $(mkdir -p "${ZEPPELIN_LOG_DIR}") From e97d6bc5456d767d2f382e8f3b4aa9b75ee734fa Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 25 Oct 2016 00:32:32 +0900 Subject: [PATCH 33/43] Check the existence of local-spark --- bin/common.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/common.sh b/bin/common.sh index eea24634037..c7b61ecedf0 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -115,6 +115,8 @@ SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" function downloadSparkBinary() { if [[ ! -d "${SPARK_CACHE}/${SPARK_ARCHIVE}" ]]; then . "${ZEPPELIN_HOME}/bin/download-spark.sh" + else + echo -e "${SPARK_ARCHIVE} already exists under local-spark." fi } From 4240e756e951d531c67a5d51143927532a4241e2 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Mon, 31 Oct 2016 19:14:19 +0900 Subject: [PATCH 34/43] Update Spark version to 2.0.1 --- bin/common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/common.sh b/bin/common.sh index c7b61ecedf0..e00d83e93d2 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -107,7 +107,7 @@ function getZeppelinVersion() { } -SPARK_VERSION="2.0.0" +SPARK_VERSION="2.0.1" HADOOP_VERSION="2.7" SPARK_CACHE="local-spark" SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" From 3f4bea843e70d3ab40e3beb1d5674333430ab465 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 8 Nov 2016 11:13:07 +0900 Subject: [PATCH 35/43] Fix travis CI failure as @astroshim suggested --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index d5f44f9de94..4865ae46a2a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -77,6 +77,7 @@ install: before_script: - travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER + - export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER - echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh - tail conf/zeppelin-env.sh From 8df7e24b6626c45f2f591da0f593df6d61fd5018 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 8 Nov 2016 18:14:12 +0900 Subject: [PATCH 36/43] Update related docs pages --- docs/install/upgrade.md | 7 ++++--- docs/interpreter/spark.md | 26 +++++++++++++++----------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/docs/install/upgrade.md b/docs/install/upgrade.md index a29ed8db327..5d3c3305f4e 100644 --- a/docs/install/upgrade.md +++ b/docs/install/upgrade.md @@ -48,9 +48,10 @@ So, copying `notebook` and `conf` directory should be enough. ## Migration Guide ### Upgrading from Zeppelin 0.6 to 0.7 - - - From 0.7, we don't use `ZEPPELIN_JAVA_OPTS` as default value of `ZEPPELIN_INTP_JAVA_OPTS` and also the same for `ZEPPELIN_MEM`/`ZEPPELIN_INTP_MEM`. If user want to configure the jvm opts of interpreter process, please set `ZEPPELIN_INTP_JAVA_OPTS` and `ZEPPELIN_INTP_MEM` explicitly. If you don't set `ZEPPELIN_INTP_MEM`, Zeppelin will set it to `-Xms1024m -Xmx1024m -XX:MaxPermSize=512m` by default. + - From 0.7, the support on Spark 1.1.x to 1.3.x is deprecated. + - Zeppelin embedded Spark won't work anymore. You need to run `./bin/zeppelin-daemon.sh get-spark` or `./bin/zeppelin.sh get-spark` at least one time. Please see [local Spark mode](../interpreter/spark.html#local-spark-mode) for more detailed information. + - We don't use `ZEPPELIN_JAVA_OPTS` as default value of `ZEPPELIN_INTP_JAVA_OPTS` and also the same for `ZEPPELIN_MEM`/`ZEPPELIN_INTP_MEM`. If user want to configure the jvm opts of interpreter process, please set `ZEPPELIN_INTP_JAVA_OPTS` and `ZEPPELIN_INTP_MEM` explicitly. If you don't set `ZEPPELIN_INTP_MEM`, Zeppelin will set it to `-Xms1024m -Xmx1024m -XX:MaxPermSize=512m` by default. - Mapping from `%jdbc(prefix)` to `%prefix` is no longer available. Instead, you can use %[interpreter alias] with multiple interpreter setttings on GUI. - Usage of `ZEPPELIN_PORT` is not supported in ssl mode. Instead use `ZEPPELIN_SSL_PORT` to configure the ssl port. Value from `ZEPPELIN_PORT` is used only when `ZEPPELIN_SSL` is set to `false`. - The support on Spark 1.1.x to 1.3.x is deprecated. - - From 0.7, we uses `pegdown` as the `markdown.parser.type` option for the `%md` interpreter. Rendered markdown might be different from what you expected + - `pegdown` will be used as `markdown.parser.type` option for `%md` interpreter. Rendered markdown might be different from what you expected. diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index fa80c3d230c..1d837b6889c 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -63,25 +63,29 @@ Apache Spark is supported in Zeppelin with Spark interpreter group which consist ## Configuration There are two Spark mode to use Spark interpreter in Zeppelin. One is [local Spark mode](#local-spark-mode) and the other is [system provided Spark mode](#system-provided-spark-mode). -After you start Zeppelin server for the first time, you will be asked whether you want to download a latest version of Spark binary under Zeppelin or not. + +### Local Spark mode +If you are a beginner to Spark and Zeppelin, we would recommend you to download [Local Spark](#local-spark-mode) for using Spark interpreter. +In this case, you can download the latest version of Spark with below command. ``` -$ ./bin/zeppelin-daemon.sh start -For using Spark interpreter in local mode(without external Spark installation), Spark binary needs to be downloaded. -Do you want to download a latest version of Spark binary? (Y/N): +./bin/zeppelin-daemon.sh get-spark ``` -If you are a beginner to Spark and Zeppelin, we recommend you to download [Local Spark](#local-spark-mode) for using Spark interpreter. -In this case, you can use Spark interpreter right after starting Zeppelin without any configurations. -Of course you can use external Spark as well. If so, enter "N" to the above question and see [System provided Spark mode](#system-provided-spark-mode) section for the next step. +or -> Please note that Zeppelin doesn't support the local Spark mode for Windows. See [System provided Spark mode](#system-provided-spark-mode) setting guide after [downloading Spark](http://spark.apache.org/downloads.html). +``` +./bin/zeppelin.sh get-spark +``` -### Local Spark mode -Nothing needs to be more configured. You can use Spark interpreter right after starting Zeppelin. +Then the downloaded Spark will be saved under `ZEPPELIN_HOME/local-spark/`. +Nothing needs to be more configured to use this local Spark(e.g. setting `SPARK_HOME` or `HADOOP_HOME`). + +> Please note that Zeppelin doesn't support the local Spark mode for Windows. See [System provided Spark mode](#system-provided-spark-mode) setting guide after [downloading Spark](http://spark.apache.org/downloads.html). ### System provided Spark mode -If you want to connect to your Spark cluster, you'll need to follow below two simple steps. +Of course you can use external Spark as well. +If you want to connect to your Spark cluster for your own usage, you'll need to follow below two simple steps. #### 1. Export SPARK_HOME In `conf/zeppelin-env.sh`, export `SPARK_HOME` environment variable with your Spark installation path. From 5680fed245e3f46e79d4522edaf2dddef790cd6d Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Tue, 8 Nov 2016 18:26:10 +0900 Subject: [PATCH 37/43] Fix typos --- README.md | 283 ++++++++++++++++++++++++++++++++++++++ docs/interpreter/spark.md | 2 +- 2 files changed, 284 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cace430fac1..183757405b9 100644 --- a/README.md +++ b/README.md @@ -23,5 +23,288 @@ To know more about Zeppelin, visit our web site [http://zeppelin.apache.org](htt ### Install binary package Please refet to [Zeppelin installation guide](http://zeppelin.apache.org/docs/snapshot/install/install.html) to install Apache Zeppelin from binary package. +<<<<<<< 380fd31b57436f6526880a6bd74e29b8a1531adf ### Build from source Please check [How to build Zeppelin from source](http://zeppelin.apache.org/docs/snapshot/install/build.html) to build Zeppelin. +======= +#### Install maven +``` +wget http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz +sudo tar -zxf apache-maven-3.3.9-bin.tar.gz -C /usr/local/ +sudo ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/local/bin/mvn +``` + +_Notes:_ + - Ensure node is installed by running `node --version` + - Ensure maven is running version 3.1.x or higher with `mvn -version` + - Configure maven to use more memory than usual by `export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=1024m"` + +#### Proxy settings (optional) +First of all, set your proxy configuration on Maven `settings.xml`. +``` + + + + proxy-http + true + http + localhost + 3128 + + localhost|127.0.0.1 + + + proxy-https + true + https + localhost + 3128 + + localhost|127.0.0.1 + + + +``` + +Then, run these commands from shell. +``` +npm config set proxy http://localhost:3128 +npm config set https-proxy http://localhost:3128 +npm config set registry "http://registry.npmjs.org/" +npm config set strict-ssl false +git config --global http.proxy http://localhost:3128 +git config --global https.proxy http://localhost:3128 +git config --global url."http://".insteadOf git:// +``` + +Cleanup: set `active false` in Maven `settings.xml` and run these commands. +``` +npm config rm proxy +npm config rm https-proxy +git config --global --unset http.proxy +git config --global --unset https.proxy +git config --global --unset url."http://".insteadOf +``` + +_Notes:_ + - If you are behind NTLM proxy you can use [Cntlm Authentication Proxy](http://cntlm.sourceforge.net/). + - Replace `localhost:3128` with the standard pattern `http://user:pwd@host:port`. + +## Build +If you want to build Zeppelin from the source, please first clone this repository, then: + +``` +mvn clean package -DskipTests [Options] +``` + +Each Interpreter requires different Options. + + +### Spark Interpreter + +To build with a specific Spark version, Hadoop version or specific features, define one or more of the following profiles and options: + +##### `-Pspark-[version]` + +Set spark major version + +Available profiles are + +``` +-Pspark-2.0 +-Pspark-1.6 +-Pspark-1.5 +-Pspark-1.4 +``` + +minor version can be adjusted by `-Dspark.version=x.x.x` + + +##### `-Phadoop-[version]` + +set hadoop major version + +Available profiles are + +``` +-Phadoop-0.23 +-Phadoop-1 +-Phadoop-2.2 +-Phadoop-2.3 +-Phadoop-2.4 +-Phadoop-2.6 +``` + +minor version can be adjusted by `-Dhadoop.version=x.x.x` + +##### `-Pscala-[version] (optional)` + +set scala version (default 2.10) +Available profiles are + +``` +-Pscala-2.10 +-Pscala-2.11 +``` + +##### `-Pr` (optional) + +enable [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration. + +##### `-Psparkr` (optional) + +another [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration as well as local mode support. + +##### `-Pvendor-repo` (optional) + +enable 3rd party vendor repository (cloudera) + + +### Ignite Interpreter + +```sh +mvn clean package -Dignite.version=1.7.0 -DskipTests +``` + +### Scalding Interpreter + +```sh +mvn clean package -Pscalding -DskipTests +``` +### -Pexamples (optional) + +Bulid examples under zeppelin-examples directory + + +### Build command examples + + +Here're some examples: + +```sh +# build with spark-2.0, scala-2.11 +./dev/change_scala_version.sh 2.11 +mvn clean package -Pspark-2.0 -Phadoop-2.4 -Psparkr -Pscala-2.11 + +# build with spark-1.6, scala-2.10 +mvn clean package -Pspark-1.6 -Phadoop-2.4 -Psparkr + +# with CDH +mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pvendor-repo -DskipTests + +# with zeppelin-examples directory +mvn clean package -Pexamples -DskipTests + +``` + +## Run + +```sh +./bin/zeppelin-daemon.sh start +``` +And browse localhost:8080 in your browser. + +## Get the latest version of local Spark +You can download the latest version of [Apache Spark](http://spark.apache.org/) and use it without any configuration for Spark interpreter. + +```sh +./bin/zeppelin-daemon.sh get-spark +``` + +or + +``` +./bin/zeppelin.sh get-spark +``` + +Then the Spark will be saved under `ZEPPELIN_HOME/local-spark`. + +## Configure +If you wish to configure Zeppelin option (like port number), configure the following files: + +``` +./conf/zeppelin-env.sh +./conf/zeppelin-site.xml +``` + +(You can copy `./conf/zeppelin-env.sh.template` into `./conf/zeppelin-env.sh`. +Same for `zeppelin-site.xml`.) + +For configuration details check __`./conf`__ subdirectory. + +### Setting SPARK_HOME and HADOOP_HOME + +There are two Spark modes to use Spark interpreter in Zeppelin. +One is using **[local-spark](#get-the-latest-version-of-local-spark)**. +If you want to use this local Spark, then you don't need to set `SPARK_HOME` or `HADOOP_HOME` by yourself. + +Or maybe you need to use other external Spark and Hadoop for your own usage. +Then you should set `SPARK_HOME` and `HADOOP_HOME` in `zeppelin-env.sh`. +You can use any supported version of Spark without rebuilding Zeppelin. + +```sh +# ./conf/zeppelin-env.sh +export SPARK_HOME=... +export HADOOP_HOME=... +``` + +### External cluster configuration + +#### Mesos + +```sh +# ./conf/zeppelin-env.sh +export MASTER=mesos://... +export ZEPPELIN_JAVA_OPTS="-Dspark.executor.uri=/path/to/spark-*.tgz" or SPARK_HOME="/path/to/spark_home" +export MESOS_NATIVE_LIBRARY=/path/to/libmesos.so +``` + +If you set `SPARK_HOME`, you should deploy Spark binary on the same location to all worker nodes. And if you set `spark.executor.uri`, every worker can read that file on its node. + +#### Yarn + +```sh +# ./conf/zeppelin-env.sh +export SPARK_HOME=/path/to/spark_dir +``` + +## Building for Scala 2.11 +To produce a Zeppelin package compiled with Scala 2.11, use the `-Pscala-2.11` profile: + +```sh +./dev/change_scala_version.sh 2.11 +mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Pscala-2.11 -DskipTests clean install +``` + +## Package +To package the final distribution including the compressed archive, run: + +```sh +mvn clean package -Pbuild-distr +``` + +To build a distribution with specific profiles, run: + +```sh +mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4 +``` + +The profiles `-Pspark-1.5 -Phadoop-2.4` can be adjusted if you wish to build to a specific spark versions. + +The archive is generated under _`zeppelin-distribution/target`_ directory + +## Run end-to-end tests +Zeppelin comes with a set of end-to-end acceptance tests driving headless selenium browser + +```sh +# assumes zeppelin-server running on localhost:8080 (use -Durl=.. to override) +mvn verify + +# or take care of starting/stoping zeppelin-server from packaged zeppelin-distribuion/target +mvn verify -P using-packaged-distr +``` + +[![Analytics](https://ga-beacon.appspot.com/UA-45176241-4/apache/zeppelin/README.md?pixel)](https://github.com/igrigorik/ga-beacon) +>>>>>>> Fix typos diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index 1d837b6889c..6549eb85b6f 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -66,7 +66,7 @@ There are two Spark mode to use Spark interpreter in Zeppelin. One is [local Spa ### Local Spark mode If you are a beginner to Spark and Zeppelin, we would recommend you to download [Local Spark](#local-spark-mode) for using Spark interpreter. -In this case, you can download the latest version of Spark with below command. +In this case, you can get the latest version of Spark with below command. ``` ./bin/zeppelin-daemon.sh get-spark From 3d96bf8b84e3231172d7681229a037434842f6ff Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Fri, 18 Nov 2016 17:19:24 +0100 Subject: [PATCH 38/43] Address @tae-jun's feedback --- conf/zeppelin-env.sh.template | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/conf/zeppelin-env.sh.template b/conf/zeppelin-env.sh.template index a35adba663c..9f3c229f4c1 100644 --- a/conf/zeppelin-env.sh.template +++ b/conf/zeppelin-env.sh.template @@ -41,16 +41,17 @@ #### Spark interpreter configuration #### -## Use provided spark installation ## -## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit +## Use provided Spark installation ## +## defining SPARK_HOME makes Zeppelin run Spark interpreter process using spark-submit ## # export SPARK_HOME # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries # export SPARK_SUBMIT_OPTIONS # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". # export SPARK_APP_NAME # (optional) The name of spark application. -## Use embedded spark binaries ## -## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries. -## however, it is not encouraged when you can define SPARK_HOME +## Use embedded Spark binaries ## +## You can simply get the embedded Spark binaries by running "ZEPPELIN_HOME/bin/zeppelin-daemon.sh get-spark" or "ZEPPELIN_HOME/bin/zeppelin.sh get-spark". +## Zeppelin can run Spark interpreter process using this embedded Spark without any configurations. +## If you can define SPARK_HOME, then you don't need to download it. ## # Options read in YARN client mode # export HADOOP_CONF_DIR # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR. From 80b8b99e640dcfdbfe1057d12469797ed9e9334d Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Fri, 18 Nov 2016 17:20:08 +0100 Subject: [PATCH 39/43] Update newly added build.md page accordingly --- README.md | 285 +----------------------------------------- docs/install/build.md | 56 ++------- 2 files changed, 9 insertions(+), 332 deletions(-) diff --git a/README.md b/README.md index 183757405b9..04feda2fb93 100644 --- a/README.md +++ b/README.md @@ -21,290 +21,7 @@ To know more about Zeppelin, visit our web site [http://zeppelin.apache.org](htt ## Getting Started ### Install binary package -Please refet to [Zeppelin installation guide](http://zeppelin.apache.org/docs/snapshot/install/install.html) to install Apache Zeppelin from binary package. +Please refer to [Zeppelin installation guide](http://zeppelin.apache.org/docs/snapshot/install/install.html) to install Apache Zeppelin from binary package. -<<<<<<< 380fd31b57436f6526880a6bd74e29b8a1531adf ### Build from source Please check [How to build Zeppelin from source](http://zeppelin.apache.org/docs/snapshot/install/build.html) to build Zeppelin. -======= -#### Install maven -``` -wget http://www.eu.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz -sudo tar -zxf apache-maven-3.3.9-bin.tar.gz -C /usr/local/ -sudo ln -s /usr/local/apache-maven-3.3.9/bin/mvn /usr/local/bin/mvn -``` - -_Notes:_ - - Ensure node is installed by running `node --version` - - Ensure maven is running version 3.1.x or higher with `mvn -version` - - Configure maven to use more memory than usual by `export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=1024m"` - -#### Proxy settings (optional) -First of all, set your proxy configuration on Maven `settings.xml`. -``` - - - - proxy-http - true - http - localhost - 3128 - - localhost|127.0.0.1 - - - proxy-https - true - https - localhost - 3128 - - localhost|127.0.0.1 - - - -``` - -Then, run these commands from shell. -``` -npm config set proxy http://localhost:3128 -npm config set https-proxy http://localhost:3128 -npm config set registry "http://registry.npmjs.org/" -npm config set strict-ssl false -git config --global http.proxy http://localhost:3128 -git config --global https.proxy http://localhost:3128 -git config --global url."http://".insteadOf git:// -``` - -Cleanup: set `active false` in Maven `settings.xml` and run these commands. -``` -npm config rm proxy -npm config rm https-proxy -git config --global --unset http.proxy -git config --global --unset https.proxy -git config --global --unset url."http://".insteadOf -``` - -_Notes:_ - - If you are behind NTLM proxy you can use [Cntlm Authentication Proxy](http://cntlm.sourceforge.net/). - - Replace `localhost:3128` with the standard pattern `http://user:pwd@host:port`. - -## Build -If you want to build Zeppelin from the source, please first clone this repository, then: - -``` -mvn clean package -DskipTests [Options] -``` - -Each Interpreter requires different Options. - - -### Spark Interpreter - -To build with a specific Spark version, Hadoop version or specific features, define one or more of the following profiles and options: - -##### `-Pspark-[version]` - -Set spark major version - -Available profiles are - -``` --Pspark-2.0 --Pspark-1.6 --Pspark-1.5 --Pspark-1.4 -``` - -minor version can be adjusted by `-Dspark.version=x.x.x` - - -##### `-Phadoop-[version]` - -set hadoop major version - -Available profiles are - -``` --Phadoop-0.23 --Phadoop-1 --Phadoop-2.2 --Phadoop-2.3 --Phadoop-2.4 --Phadoop-2.6 -``` - -minor version can be adjusted by `-Dhadoop.version=x.x.x` - -##### `-Pscala-[version] (optional)` - -set scala version (default 2.10) -Available profiles are - -``` --Pscala-2.10 --Pscala-2.11 -``` - -##### `-Pr` (optional) - -enable [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration. - -##### `-Psparkr` (optional) - -another [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration as well as local mode support. - -##### `-Pvendor-repo` (optional) - -enable 3rd party vendor repository (cloudera) - - -### Ignite Interpreter - -```sh -mvn clean package -Dignite.version=1.7.0 -DskipTests -``` - -### Scalding Interpreter - -```sh -mvn clean package -Pscalding -DskipTests -``` -### -Pexamples (optional) - -Bulid examples under zeppelin-examples directory - - -### Build command examples - - -Here're some examples: - -```sh -# build with spark-2.0, scala-2.11 -./dev/change_scala_version.sh 2.11 -mvn clean package -Pspark-2.0 -Phadoop-2.4 -Psparkr -Pscala-2.11 - -# build with spark-1.6, scala-2.10 -mvn clean package -Pspark-1.6 -Phadoop-2.4 -Psparkr - -# with CDH -mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pvendor-repo -DskipTests - -# with zeppelin-examples directory -mvn clean package -Pexamples -DskipTests - -``` - -## Run - -```sh -./bin/zeppelin-daemon.sh start -``` -And browse localhost:8080 in your browser. - -## Get the latest version of local Spark -You can download the latest version of [Apache Spark](http://spark.apache.org/) and use it without any configuration for Spark interpreter. - -```sh -./bin/zeppelin-daemon.sh get-spark -``` - -or - -``` -./bin/zeppelin.sh get-spark -``` - -Then the Spark will be saved under `ZEPPELIN_HOME/local-spark`. - -## Configure -If you wish to configure Zeppelin option (like port number), configure the following files: - -``` -./conf/zeppelin-env.sh -./conf/zeppelin-site.xml -``` - -(You can copy `./conf/zeppelin-env.sh.template` into `./conf/zeppelin-env.sh`. -Same for `zeppelin-site.xml`.) - -For configuration details check __`./conf`__ subdirectory. - -### Setting SPARK_HOME and HADOOP_HOME - -There are two Spark modes to use Spark interpreter in Zeppelin. -One is using **[local-spark](#get-the-latest-version-of-local-spark)**. -If you want to use this local Spark, then you don't need to set `SPARK_HOME` or `HADOOP_HOME` by yourself. - -Or maybe you need to use other external Spark and Hadoop for your own usage. -Then you should set `SPARK_HOME` and `HADOOP_HOME` in `zeppelin-env.sh`. -You can use any supported version of Spark without rebuilding Zeppelin. - -```sh -# ./conf/zeppelin-env.sh -export SPARK_HOME=... -export HADOOP_HOME=... -``` - -### External cluster configuration - -#### Mesos - -```sh -# ./conf/zeppelin-env.sh -export MASTER=mesos://... -export ZEPPELIN_JAVA_OPTS="-Dspark.executor.uri=/path/to/spark-*.tgz" or SPARK_HOME="/path/to/spark_home" -export MESOS_NATIVE_LIBRARY=/path/to/libmesos.so -``` - -If you set `SPARK_HOME`, you should deploy Spark binary on the same location to all worker nodes. And if you set `spark.executor.uri`, every worker can read that file on its node. - -#### Yarn - -```sh -# ./conf/zeppelin-env.sh -export SPARK_HOME=/path/to/spark_dir -``` - -## Building for Scala 2.11 -To produce a Zeppelin package compiled with Scala 2.11, use the `-Pscala-2.11` profile: - -```sh -./dev/change_scala_version.sh 2.11 -mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Pscala-2.11 -DskipTests clean install -``` - -## Package -To package the final distribution including the compressed archive, run: - -```sh -mvn clean package -Pbuild-distr -``` - -To build a distribution with specific profiles, run: - -```sh -mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4 -``` - -The profiles `-Pspark-1.5 -Phadoop-2.4` can be adjusted if you wish to build to a specific spark versions. - -The archive is generated under _`zeppelin-distribution/target`_ directory - -## Run end-to-end tests -Zeppelin comes with a set of end-to-end acceptance tests driving headless selenium browser - -```sh -# assumes zeppelin-server running on localhost:8080 (use -Durl=.. to override) -mvn verify - -# or take care of starting/stoping zeppelin-server from packaged zeppelin-distribuion/target -mvn verify -P using-packaged-distr -``` - -[![Analytics](https://ga-beacon.appspot.com/UA-45176241-4/apache/zeppelin/README.md?pixel)](https://github.com/igrigorik/ga-beacon) ->>>>>>> Fix typos diff --git a/docs/install/build.md b/docs/install/build.md index 7d89a98a393..1a71d7efeb8 100644 --- a/docs/install/build.md +++ b/docs/install/build.md @@ -69,7 +69,7 @@ If you're unsure about the options, use the same commands that creates official # update all pom.xml to use scala 2.11 ./dev/change_scala_version.sh 2.11 # build zeppelin with all interpreters and include latest version of Apache spark support for local mode. -mvn clean package -DskipTests -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pr -Pscala-2.11 +mvn clean package -DskipTests -Pspark-2.0 -Phadoop-2.4 -Psparkr -Pr -Pscala-2.11 ``` ####3. Done @@ -86,8 +86,6 @@ If you're interested in contribution, please check [Contributing to Apache Zeppe ### Build profiles -#### Spark Interpreter - To build with a specific Spark version, Hadoop version or specific features, define one or more of the following profiles and options: ##### `-Pspark-[version]` @@ -101,11 +99,6 @@ Available profiles are -Pspark-1.6 -Pspark-1.5 -Pspark-1.4 --Pcassandra-spark-1.5 --Pcassandra-spark-1.4 --Pcassandra-spark-1.3 --Pcassandra-spark-1.2 --Pcassandra-spark-1.1 ``` minor version can be adjusted by `-Dspark.version=x.x.x` @@ -138,15 +131,6 @@ Available profiles are -Pscala-2.11 ``` -##### `-Pyarn` (optional) - -enable YARN support for local mode -> YARN for local mode is not supported for Spark v1.5.0 or higher. Set `SPARK_HOME` instead. - -##### `-Ppyspark` (optional) - -enable [PySpark](http://spark.apache.org/docs/latest/api/python/) support for local mode. - ##### `-Pr` (optional) enable [R](https://www.r-project.org/) support with [SparkR](https://spark.apache.org/docs/latest/sparkr.html) integration. @@ -160,24 +144,9 @@ another [R](https://www.r-project.org/) support with [SparkR](https://spark.apac enable 3rd party vendor repository (cloudera) -##### `-Pmapr[version]` (optional) +##### `-Pexamples` (optional) -For the MapR Hadoop Distribution, these profiles will handle the Hadoop version. As MapR allows different versions of Spark to be installed, you should specify which version of Spark is installed on the cluster by adding a Spark profile (`-Pspark-1.6`, `-Pspark-2.0`, etc.) as needed. -The correct Maven artifacts can be found for every version of MapR at http://doc.mapr.com - -Available profiles are - -``` --Pmapr3 --Pmapr40 --Pmapr41 --Pmapr50 --Pmapr51 -``` - -#### -Pexamples (optional) - -Bulid examples under zeppelin-examples directory +Bulid examples under `zeppelin-examples` directory ### Build command examples @@ -186,25 +155,19 @@ Here are some examples with several options: ```bash # build with spark-2.0, scala-2.11 ./dev/change_scala_version.sh 2.11 -mvn clean package -Pspark-2.0 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -Pscala-2.11 -DskipTests +mvn clean package -Pspark-2.0 -Phadoop-2.4 -Psparkr -Pscala-2.11 -DskipTests # build with spark-1.6, scala-2.10 -mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Psparkr -DskipTests - -# spark-cassandra integration -mvn clean package -Pcassandra-spark-1.5 -Dhadoop.version=2.6.0 -Phadoop-2.6 -DskipTests -DskipTests +mvn clean package -Pspark-1.6 -Phadoop-2.4 -Psparkr -DskipTests # with CDH mvn clean package -Pspark-1.5 -Dhadoop.version=2.6.0-cdh5.5.0 -Phadoop-2.6 -Pvendor-repo -DskipTests - -# with MapR -mvn clean package -Pspark-1.5 -Pmapr50 -DskipTests ``` Ignite Interpreter ```bash -mvn clean package -Dignite.version=1.6.0 -DskipTests +mvn clean package -Dignite.version=1.7.0 -DskipTests ``` Scalding Interpreter @@ -213,9 +176,6 @@ Scalding Interpreter mvn clean package -Pscalding -DskipTests ``` - - - ## Build requirements ### Install requirements @@ -322,10 +282,10 @@ mvn clean package -Pbuild-distr To build a distribution with specific profiles, run: ```sh -mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4 -Pyarn -Ppyspark +mvn clean package -Pbuild-distr -Pspark-1.5 -Phadoop-2.4 ``` -The profiles `-Pspark-1.5 -Phadoop-2.4 -Pyarn -Ppyspark` can be adjusted if you wish to build to a specific spark versions, or omit support such as `yarn`. +The profiles `-Pspark-1.5 -Phadoop-2.4` can be adjusted if you wish to build to a specific spark versions. The archive is generated under _`zeppelin-distribution/target`_ directory From 2747d9eec49aa04f92ac93408f4c00cb101cb23e Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Fri, 18 Nov 2016 17:57:57 +0100 Subject: [PATCH 40/43] Print notice msg when Zeppelin server start --- bin/zeppelin-daemon.sh | 4 ++++ bin/zeppelin.sh | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/bin/zeppelin-daemon.sh b/bin/zeppelin-daemon.sh index c3e660da07c..ab3e2397095 100755 --- a/bin/zeppelin-daemon.sh +++ b/bin/zeppelin-daemon.sh @@ -179,6 +179,10 @@ function start() { initialize_default_directories + if [[ ! -d "${SPARK_CACHE}/${SPARK_ARCHIVE}" && -z "${SPARK_HOME}" ]]; then + echo -e "\nYou do not have neither local-spark, nor external SPARK_HOME set up.\nIf you want to use Spark interpreter, you need to run get-spark at least one time or set SPARK_HOME.\n" + fi + echo "ZEPPELIN_CLASSPATH: ${ZEPPELIN_CLASSPATH_OVERRIDES}:${CLASSPATH}" >> "${ZEPPELIN_OUTFILE}" nohup nice -n $ZEPPELIN_NICENESS $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:$CLASSPATH $ZEPPELIN_MAIN >> "${ZEPPELIN_OUTFILE}" 2>&1 < /dev/null & diff --git a/bin/zeppelin.sh b/bin/zeppelin.sh index c59def79214..dfafac988e5 100755 --- a/bin/zeppelin.sh +++ b/bin/zeppelin.sh @@ -91,4 +91,8 @@ if [[ ! -d "${ZEPPELIN_NOTEBOOK_DIR}" ]]; then $(mkdir -p "${ZEPPELIN_NOTEBOOK_DIR}") fi +if [[ ! -d "${SPARK_CACHE}/${SPARK_ARCHIVE}" && -z "${SPARK_HOME}" ]]; then + echo -e "\nYou do not have neither local-spark, nor external SPARK_HOME set up.\nIf you want to use Spark interpreter, you need to run get-spark at least one time or set SPARK_HOME.\n" +fi + exec $ZEPPELIN_RUNNER $JAVA_OPTS -cp $ZEPPELIN_CLASSPATH_OVERRIDES:$CLASSPATH $ZEPPELIN_SERVER "$@" From eb6fa6a5e162fcdf2488f158bd70674087aabe72 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Sun, 20 Nov 2016 09:10:51 +0100 Subject: [PATCH 41/43] Update vagrant/ related files accordinly --- scripts/vagrant/zeppelin-dev/README.md | 8 +++++++- scripts/vagrant/zeppelin-dev/show-instructions.sh | 11 +++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/scripts/vagrant/zeppelin-dev/README.md b/scripts/vagrant/zeppelin-dev/README.md index 4d0e6c7c1d3..b9671f4940f 100644 --- a/scripts/vagrant/zeppelin-dev/README.md +++ b/scripts/vagrant/zeppelin-dev/README.md @@ -83,11 +83,17 @@ The virtual machine consists of: ### How to build & run Zeppelin -This assumes you've already cloned the project either on the host machine in the zeppelin-dev directory (to be shared with the guest machine) or cloned directly into a directory while running inside the guest machine. The following build steps will also include Python and R support via PySpark and SparkR: +This assumes you've already cloned the project either on the host machine in the zeppelin-dev directory (to be shared with the guest machine) or cloned directly into a directory while running inside the guest machine. +The following build steps will also include R support via SparkR(`-Psparkr`): ``` cd /zeppelin mvn clean package -Pspark-1.6 -Phadoop-2.4 -Psparkr -DskipTests +``` + +You can download a latest version of local Spark with `get-spark` option. +``` +./bin/zeppelin-daemon.sh get-spark ./bin/zeppelin-daemon.sh start ``` diff --git a/scripts/vagrant/zeppelin-dev/show-instructions.sh b/scripts/vagrant/zeppelin-dev/show-instructions.sh index f3b2b27aeb9..159c1801903 100644 --- a/scripts/vagrant/zeppelin-dev/show-instructions.sh +++ b/scripts/vagrant/zeppelin-dev/show-instructions.sh @@ -32,10 +32,13 @@ echo echo 'cd /vagrant/zeppelin' echo 'mvn clean package -DskipTests' echo -echo '# or for a specific Spark/Hadoop build with additional options such as python and R support' -echo -echo 'mvn clean package -Pspark-1.6 -Ppyspark -Phadoop-2.4 -Psparkr -DskipTests' +echo '# or for a specific Spark/Hadoop build with additional options' +echo '# e.g. -Psparkr enables to use SparkR in Zeppelin' +echo 'mvn clean package -Pspark-1.6 -Phadoop-2.4 -Psparkr -DskipTests' +echo +echo '# download local Spark with get-spark option before Zeppelin start' +echo './bin/zeppelin-daemon.sh get-spark' echo './bin/zeppelin-daemon.sh start' echo -echo 'On your host machine browse to http://localhost:8080/' +echo '# On your host machine browse http://localhost:8080/' From 2c1fe1510f4ff1a2c64f98151bfd8b8d9232fc98 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 24 Nov 2016 14:27:22 +0900 Subject: [PATCH 42/43] Address @bzz feedback: update migration notice \w stronger msg --- docs/install/upgrade.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/install/upgrade.md b/docs/install/upgrade.md index 5d3c3305f4e..2339b212123 100644 --- a/docs/install/upgrade.md +++ b/docs/install/upgrade.md @@ -48,8 +48,8 @@ So, copying `notebook` and `conf` directory should be enough. ## Migration Guide ### Upgrading from Zeppelin 0.6 to 0.7 + - Apache Zeppelin releases do not come with Apache Spark build-in by default any more. In order to be able to run [Apache Spark](http://spark.apache.org/) paragraphs, please either run `./bin/zeppelin-daemon.sh get-spark` or point `$SPARK_HOME` to Apache Spark installation. See [local Spark mode](../interpreter/spark.html#local-spark-mode) for more details. - From 0.7, the support on Spark 1.1.x to 1.3.x is deprecated. - - Zeppelin embedded Spark won't work anymore. You need to run `./bin/zeppelin-daemon.sh get-spark` or `./bin/zeppelin.sh get-spark` at least one time. Please see [local Spark mode](../interpreter/spark.html#local-spark-mode) for more detailed information. - We don't use `ZEPPELIN_JAVA_OPTS` as default value of `ZEPPELIN_INTP_JAVA_OPTS` and also the same for `ZEPPELIN_MEM`/`ZEPPELIN_INTP_MEM`. If user want to configure the jvm opts of interpreter process, please set `ZEPPELIN_INTP_JAVA_OPTS` and `ZEPPELIN_INTP_MEM` explicitly. If you don't set `ZEPPELIN_INTP_MEM`, Zeppelin will set it to `-Xms1024m -Xmx1024m -XX:MaxPermSize=512m` by default. - Mapping from `%jdbc(prefix)` to `%prefix` is no longer available. Instead, you can use %[interpreter alias] with multiple interpreter setttings on GUI. - Usage of `ZEPPELIN_PORT` is not supported in ssl mode. Instead use `ZEPPELIN_SSL_PORT` to configure the ssl port. Value from `ZEPPELIN_PORT` is used only when `ZEPPELIN_SSL` is set to `false`. From a651f48f1f5bac33395746938283586d9fc23324 Mon Sep 17 00:00:00 2001 From: AhyoungRyu Date: Thu, 24 Nov 2016 19:41:29 +0900 Subject: [PATCH 43/43] Remove unused variables in download-spark.sh --- bin/download-spark.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bin/download-spark.sh b/bin/download-spark.sh index 39786d510d6..a0bdd171d51 100644 --- a/bin/download-spark.sh +++ b/bin/download-spark.sh @@ -21,12 +21,6 @@ bin=$(cd "${bin}">/dev/null; pwd) . "${bin}/common.sh" -ZEPPELIN_ENV="conf/zeppelin-env.sh" -ZEPPELIN_ENV_TEMP="${ZEPPELIN_ENV}.template" -ZEPPELIN_VERSION="$(getZeppelinVersion)" - -ANSWER_FILE="README.txt" - # Download Spark binary package from the given URL. # Ties 3 times with 1s delay # Arguments: url - source URL