Skip to content

Commit

Permalink
[ZEPPELIN-18] Running pyspark without deploying python libraries to e…
Browse files Browse the repository at this point in the history
…very yarn node

- rebasing
  • Loading branch information
jongyoul committed Jul 4, 2015
1 parent 0a2d90e commit 64b8195
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 61 deletions.
132 changes: 71 additions & 61 deletions spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,77 @@
</dependencies>
</profile>

<profile>
<id>yarn-pyspark</id>
<properties>
<spark.download.url>http://www.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz
</spark.download.url>
</properties>
<build>
<plugins>
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
<artifactId>download-maven-plugin</artifactId>
<version>1.2.1</version>
<executions>
<execution>
<id>download-pyspark-files</id>
<phase>validate</phase>
<goals>
<goal>wget</goal>
</goals>
<configuration>
<url>${spark.download.url}</url>
<unpack>true</unpack>
<outputDirectory>${project.build.directory}/spark-dist</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>${basedir}/../python/build</directory>
</fileset>
<fileset>
<directory>${project.build.direcoty}/spark-dist</directory>
</fileset>
</filesets>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<id>download-and-zip-pyspark-files</id>
<phase>generate-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<delete dir="../python"/>
<copy todir="../python">
<fileset dir="${project.build.directory}/spark-dist/spark-${spark.version}/python"/>
</copy>
<unzip src="../python/lib/py4j-0.8.2.1-src.zip"
dest="../python/build"/>
<zip destfile="${project.build.directory}/../../python/lib/pyspark.zip"
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python"
includes="pyspark/*.py,pyspark/**/*.py"/>
</target>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>

<!-- Build without Hadoop dependencies that are included in some runtime environments. -->
<profile>
<id>hadoop-provided</id>
Expand Down Expand Up @@ -907,67 +978,6 @@
</executions>
</plugin>

<!-- for pyspark -->
<plugin>
<groupId>com.googlecode.maven-download-plugin</groupId>
<artifactId>download-maven-plugin</artifactId>
<version>1.2.1</version>
<executions>
<execution>
<id>download-pyspark-files</id>
<phase>validate</phase>
<goals>
<goal>wget</goal>
</goals>
<configuration>
<url>${spark.download.url}</url>
<unpack>true</unpack>
<outputDirectory>${project.build.directory}/spark-dist</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>${basedir}/../python/build</directory>
</fileset>
<fileset>
<directory>${project.build.direcoty}/spark-dist</directory>
</fileset>
</filesets>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<id>download-and-zip-pyspark-files</id>
<phase>generate-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<delete dir="../python" />
<copy todir="../python">
<fileset dir="${project.build.directory}/spark-dist/spark-${spark.version}/python"/>
</copy>
<unzip src="../python/lib/py4j-0.8.2.1-src.zip"
dest="../python/build"/>
<zip destfile="${project.build.directory}/../../python/lib/pyspark.zip"
basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python"
includes="pyspark/*.py,pyspark/**/*.py"/>
</target>
</configuration>
</execution>
</executions>
</plugin>

<!-- Plugin to compile Scala code -->
<plugin>
<groupId>org.scala-tools</groupId>
Expand Down
3 changes: 3 additions & 0 deletions zeppelin-distribution/src/assemble/distribution.xml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@
<fileSet>
<directory>../notebook</directory>
</fileSet>
<fileSet>
<directory>../python</directory>
</fileSet>
</fileSets>
<!--<fileSet>
<directory>zeppelin-cli/target</directory>
Expand Down

0 comments on commit 64b8195

Please sign in to comment.