diff --git a/.gitignore b/.gitignore
index 1358a425246..75d67370774 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,3 +77,4 @@ auto-save-list
tramp
.\#*
*.swp
+**/dependency-reduced-pom.xml
diff --git a/.travis.yml b/.travis.yml
index 4031e781edd..ac80e8a6959 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,16 +22,16 @@ before_install:
- "sh -e /etc/init.d/xvfb start"
install:
- - mvn package -DskipTests -Phadoop-2.3 -B
+ - mvn package -DskipTests -Phadoop-2.3 -Ppyspark -B
before_script:
-
script:
# spark 1.4
- - mvn package -Pbuild-distr -Phadoop-2.3 -B
+ - mvn package -Pbuild-distr -Phadoop-2.3 -Ppyspark -B
- ./testing/startSparkCluster.sh 1.4.0 2.3
- - SPARK_HOME=./spark-1.4.1-bin-hadoop2.3 mvn verify -Pusing-packaged-distr -Phadoop-2.3 -B
+ - SPARK_HOME=`pwd`/spark-1.4.0-bin-hadoop2.3 mvn verify -Pusing-packaged-distr -Phadoop-2.3 -Ppyspark -B
- ./testing/stopSparkCluster.sh 1.4.0 2.3
# spark 1.3
- mvn clean package -DskipTests -Pspark-1.3 -Phadoop-2.3 -B -pl 'zeppelin-interpreter,spark'
diff --git a/bin/interpreter.sh b/bin/interpreter.sh
index c214c3081d0..62bc514ef74 100755
--- a/bin/interpreter.sh
+++ b/bin/interpreter.sh
@@ -73,6 +73,19 @@ if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then
$(mkdir -p "${ZEPPELIN_LOG_DIR}")
fi
+if [[ ! -z "${SPARK_HOME}" ]]; then
+ PYSPARKPATH="${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-0.8.2.1-src.zip"
+else
+ PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip"
+fi
+
+if [[ x"" == x"${PYTHONPATH}" ]]; then
+ export PYTHONPATH="${PYSPARKPATH}"
+else
+ export PYTHONPATH="${PYTHONPATH}:${PYSPARKPATH}"
+fi
+
+unset PYSPARKPATH
${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} -cp ${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} &
pid=$!
diff --git a/spark/pom.xml b/spark/pom.xml
index 782670e256f..b9327634818 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -48,6 +48,8 @@
org.spark-project.akka
2.3.4-spark
+
+ http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz
@@ -473,13 +475,6 @@
-
-
- net.sf.py4j
- py4j
- 0.8.2.1
-
-
org.apache.commons
commons-exec
@@ -731,6 +726,74 @@
+
+ pyspark
+
+ http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz
+
+
+
+
+
+ com.googlecode.maven-download-plugin
+ download-maven-plugin
+ 1.2.1
+
+
+ download-pyspark-files
+ validate
+
+ wget
+
+
+ ${spark.download.url}
+ true
+ ${project.build.directory}/spark-dist
+
+
+
+
+
+ maven-clean-plugin
+
+
+
+ ${basedir}/../python/build
+
+
+ ${project.build.direcoty}/spark-dist
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+ 1.7
+
+
+ download-and-zip-pyspark-files
+ generate-resources
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
hadoop-provided
diff --git a/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java
index 092b077359c..852dd335183 100644
--- a/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java
+++ b/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java
@@ -159,18 +159,6 @@ public void open() {
try {
Map env = EnvironmentUtils.getProcEnvironment();
- String pythonPath = (String) env.get("PYTHONPATH");
- if (pythonPath == null) {
- pythonPath = "";
- } else {
- pythonPath += ":";
- }
-
- pythonPath += getSparkHome() + "/python/lib/py4j-0.8.2.1-src.zip:"
- + getSparkHome() + "/python";
-
- env.put("PYTHONPATH", pythonPath);
-
executor.execute(cmd, env, this);
pythonscriptRunning = true;
} catch (IOException e) {
diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java
index ab3609ab422..aec6d16d55a 100644
--- a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java
+++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java
@@ -26,12 +26,9 @@
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLClassLoader;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
+import java.util.*;
+import com.google.common.base.Joiner;
import org.apache.spark.HttpServer;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
@@ -273,6 +270,34 @@ public SparkContext createSparkContext() {
}
}
+ //TODO(jongyoul): Move these codes into PySparkInterpreter.java
+
+ String pysparkBasePath = getSystemDefault("SPARK_HOME", "spark.home", null);
+ File pysparkPath;
+ if (null == pysparkBasePath) {
+ pysparkBasePath = getSystemDefault("ZEPPELIN_HOME", "zeppelin.home", "../");
+ pysparkPath = new File(pysparkBasePath,
+ "interpreter" + File.separator + "spark" + File.separator + "pyspark");
+ } else {
+ pysparkPath = new File(pysparkBasePath,
+ "python" + File.separator + "lib");
+ }
+
+ String[] pythonLibs = new String[]{"pyspark.zip", "py4j-0.8.2.1-src.zip"};
+ ArrayList pythonLibUris = new ArrayList<>();
+ for (String lib : pythonLibs) {
+ File libFile = new File(pysparkPath, lib);
+ if (libFile.exists()) {
+ pythonLibUris.add(libFile.toURI().toString());
+ }
+ }
+ pythonLibUris.trimToSize();
+ if (pythonLibs.length == pythonLibUris.size()) {
+ conf.set("spark.yarn.dist.files", Joiner.on(",").join(pythonLibUris));
+ conf.set("spark.files", conf.get("spark.yarn.dist.files"));
+ conf.set("spark.submit.pyArchives", Joiner.on(":").join(pythonLibs));
+ }
+
SparkContext sparkContext = new SparkContext(conf);
return sparkContext;
}