diff --git a/.gitignore b/.gitignore index 1358a425246..75d67370774 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,4 @@ auto-save-list tramp .\#* *.swp +**/dependency-reduced-pom.xml diff --git a/.travis.yml b/.travis.yml index 4031e781edd..ac80e8a6959 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,16 +22,16 @@ before_install: - "sh -e /etc/init.d/xvfb start" install: - - mvn package -DskipTests -Phadoop-2.3 -B + - mvn package -DskipTests -Phadoop-2.3 -Ppyspark -B before_script: - script: # spark 1.4 - - mvn package -Pbuild-distr -Phadoop-2.3 -B + - mvn package -Pbuild-distr -Phadoop-2.3 -Ppyspark -B - ./testing/startSparkCluster.sh 1.4.0 2.3 - - SPARK_HOME=./spark-1.4.1-bin-hadoop2.3 mvn verify -Pusing-packaged-distr -Phadoop-2.3 -B + - SPARK_HOME=`pwd`/spark-1.4.0-bin-hadoop2.3 mvn verify -Pusing-packaged-distr -Phadoop-2.3 -Ppyspark -B - ./testing/stopSparkCluster.sh 1.4.0 2.3 # spark 1.3 - mvn clean package -DskipTests -Pspark-1.3 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark' diff --git a/bin/interpreter.sh b/bin/interpreter.sh index c214c3081d0..62bc514ef74 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -73,6 +73,19 @@ if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then $(mkdir -p "${ZEPPELIN_LOG_DIR}") fi +if [[ ! -z "${SPARK_HOME}" ]]; then + PYSPARKPATH="${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-0.8.2.1-src.zip" +else + PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip" +fi + +if [[ x"" == x"${PYTHONPATH}" ]]; then + export PYTHONPATH="${PYSPARKPATH}" +else + export PYTHONPATH="${PYTHONPATH}:${PYSPARKPATH}" +fi + +unset PYSPARKPATH ${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} -cp ${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} & pid=$! diff --git a/spark/pom.xml b/spark/pom.xml index 782670e256f..b9327634818 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -48,6 +48,8 @@ org.spark-project.akka 2.3.4-spark + + http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz @@ -473,13 +475,6 @@ - - - net.sf.py4j - py4j - 0.8.2.1 - - org.apache.commons commons-exec @@ -731,6 +726,74 @@ + + pyspark + + http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz + + + + + + com.googlecode.maven-download-plugin + download-maven-plugin + 1.2.1 + + + download-pyspark-files + validate + + wget + + + ${spark.download.url} + true + ${project.build.directory}/spark-dist + + + + + + maven-clean-plugin + + + + ${basedir}/../python/build + + + ${project.build.direcoty}/spark-dist + + + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.7 + + + download-and-zip-pyspark-files + generate-resources + + run + + + + + + + + + + + + + + + hadoop-provided diff --git a/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java index 092b077359c..852dd335183 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java @@ -159,18 +159,6 @@ public void open() { try { Map env = EnvironmentUtils.getProcEnvironment(); - String pythonPath = (String) env.get("PYTHONPATH"); - if (pythonPath == null) { - pythonPath = ""; - } else { - pythonPath += ":"; - } - - pythonPath += getSparkHome() + "/python/lib/py4j-0.8.2.1-src.zip:" - + getSparkHome() + "/python"; - - env.put("PYTHONPATH", pythonPath); - executor.execute(cmd, env, this); pythonscriptRunning = true; } catch (IOException e) { diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java index ab3609ab422..aec6d16d55a 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java @@ -26,12 +26,9 @@ import java.lang.reflect.Method; import java.net.URL; import java.net.URLClassLoader; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; +import java.util.*; +import com.google.common.base.Joiner; import org.apache.spark.HttpServer; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; @@ -273,6 +270,34 @@ public SparkContext createSparkContext() { } } + //TODO(jongyoul): Move these codes into PySparkInterpreter.java + + String pysparkBasePath = getSystemDefault("SPARK_HOME", "spark.home", null); + File pysparkPath; + if (null == pysparkBasePath) { + pysparkBasePath = getSystemDefault("ZEPPELIN_HOME", "zeppelin.home", "../"); + pysparkPath = new File(pysparkBasePath, + "interpreter" + File.separator + "spark" + File.separator + "pyspark"); + } else { + pysparkPath = new File(pysparkBasePath, + "python" + File.separator + "lib"); + } + + String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.8.2.1-src.zip"}; + ArrayList pythonLibUris = new ArrayList<>(); + for (String lib : pythonLibs) { + File libFile = new File(pysparkPath, lib); + if (libFile.exists()) { + pythonLibUris.add(libFile.toURI().toString()); + } + } + pythonLibUris.trimToSize(); + if (pythonLibs.length == pythonLibUris.size()) { + conf.set("spark.yarn.dist.files", Joiner.on(",").join(pythonLibUris)); + conf.set("spark.files", conf.get("spark.yarn.dist.files")); + conf.set("spark.submit.pyArchives", Joiner.on(":").join(pythonLibs)); + } + SparkContext sparkContext = new SparkContext(conf); return sparkContext; }