Skip to content

Commit

Permalink
[ZEPPELIN-18] Running pyspark without deploying python libraries to e…
Browse files Browse the repository at this point in the history
  • Loading branch information
jongyoul committed Jul 4, 2015
1 parent 1b192f6 commit 0ddb436
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 25 deletions.
10 changes: 10 additions & 0 deletions bin/interpreter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@ if [[ ! -d "${ZEPPELIN_LOG_DIR}" ]]; then
$(mkdir -p "${ZEPPELIN_LOG_DIR}")
fi

if [[ x"" == x${PYTHONPATH} ]]; then
export PYTHONPATH="${ZEPPELIN_HOME}/python/lib/pyspark.zip:${ZEPPELIN_HOME}/python/lib/py4j-0.8.2.1-src.zip"
else
export PYTHONPATH="$PYTHONPATH${ZEPPELIN_HOME}/lib/pyspark.zip:${ZEPPELIN_HOME}/python/lib/py4j-0.8.2.1-src.zip"
fi

if [[ x"" == x${SPARK_HOME} ]]; then
export SPARK_HOME=${ZEPPELIN_HOME}
fi


${ZEPPELIN_RUNNER} ${JAVA_INTP_OPTS} -cp ${CLASSPATH} ${ZEPPELIN_SERVER} ${PORT} &
pid=$!
Expand Down
33 changes: 25 additions & 8 deletions spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -915,7 +915,7 @@
<executions>
<execution>
<id>download-pyspark-files</id>
<phase>prepare-package</phase>
<phase>validate</phase>
<goals>
<goal>wget</goal>
</goals>
Expand All @@ -927,25 +927,42 @@
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>${basedir}/../python/build</directory>
</fileset>
<fileset>
<directory>${project.build.direcoty}/spark-dist</directory>
</fileset>
</filesets>
<verbose>true</verbose>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<id>download-and-zip-pyspark-files</id>
<phase>package</phase>
<phase>generate-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<delete dir="${project.build.directory}/../../python" />
<zip destfile="${project.build.directory}/../../python/pyspark.zip"
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python/pyspark"/>
<copy
file="${project.build.directory}/spark-dist/spark-${spark.version}/python/lib/py4j-0.8.2.1-src.zip"
todir="${project.build.directory}/../../python"/>
<delete dir="../python" />
<copy todir="../python">
<fileset dir="${project.build.directory}/spark-dist/spark-${spark.version}/python"/>
</copy>
<unzip src="../python/lib/py4j-0.8.2.1-src.zip"
dest="../python/build"/>
<zip destfile="${project.build.directory}/../../python/lib/pyspark.zip"
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python"
includes="pyspark/*.py,pyspark/**/*.py"/>
</target>
</configuration>
</execution>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,18 +159,6 @@ public void open() {
try {
Map env = EnvironmentUtils.getProcEnvironment();

String pythonPath = (String) env.get("PYTHONPATH");
if (pythonPath == null) {
pythonPath = "";
} else {
pythonPath += ":";
}

pythonPath += getSparkHome() + "/python/lib/py4j-0.8.2.1-src.zip:"
+ getSparkHome() + "/python";

env.put("PYTHONPATH", pythonPath);

executor.execute(cmd, env, this);
pythonscriptRunning = true;
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,9 @@
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.*;

import com.google.common.base.Joiner;
import org.apache.spark.HttpServer;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
Expand Down Expand Up @@ -272,6 +269,18 @@ public SparkContext createSparkContext() {
conf.set(key, val);
}
}

//TODO(jongyoul): Move these codes into PySparkInterpreter.java
String zeppelinHome = getSystemDefault("ZEPPELIN_HOME", "zeppelin.home", "../");
File zeppelinPythonLibPath = new File(zeppelinHome, "python/lib");
String[] pythonLibs = new String[] {"pyspark.zip", "py4j-0.8.2.1-src.zip"};
ArrayList<String> pythonLibUris = new ArrayList<>();
for (String lib: pythonLibs) {
pythonLibUris.add(new File(zeppelinPythonLibPath, lib).toURI().toString());
}
conf.set("spark.yarn.dist.files", Joiner.on(",").join(pythonLibUris));
conf.set("spark.files", conf.get("spark.yarn.dist.files"));
conf.set("spark.submit.pyArchives", Joiner.on(":").join(pythonLibs));

SparkContext sparkContext = new SparkContext(conf);
return sparkContext;
Expand Down

0 comments on commit 0ddb436

Please sign in to comment.