Skip to content

Commit

Permalink
zip pyspark archives
Browse files Browse the repository at this point in the history
  • Loading branch information
lianhuiwang committed Apr 29, 2015
1 parent 5192cca commit f11f84a
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 4 deletions.
36 changes: 35 additions & 1 deletion assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,27 @@
<skip>true</skip>
</configuration>
</plugin>
<!-- zip pyspark archives to run python application on yarn mode -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
<configuration>
<target>
<delete dir="${basedir}/../python/lib/pyspark.zip"/>
<zip destfile="${basedir}/../python/lib/pyspark.zip">
<fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
</zip>
</target>
</configuration>
</plugin>
<!-- Use the shade plugin to create a big JAR with all the dependencies -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
Expand Down Expand Up @@ -196,6 +217,19 @@
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<executions>
<!--execution>
<id>pyspark-zip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<skipAssembly>true</skipAssembly>
<descriptors>
<descriptor>src/main/assembly/pyspark-assembly.xml</descriptor>
</descriptors>
</configuration>
</execution-->
<execution>
<id>dist</id>
<phase>package</phase>
Expand All @@ -208,7 +242,7 @@
</descriptors>
</configuration>
</execution>
</executions>
</executions>
</plugin>
</plugins>
</build>
Expand Down
1 change: 0 additions & 1 deletion make-distribution.sh
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,6 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
cp "$SPARK_HOME/README.md" "$DISTDIR"
cp -r "$SPARK_HOME/bin" "$DISTDIR"
cp -r "$SPARK_HOME/python" "$DISTDIR"
zip -r "$DISTDIR"/python/lib/pyspark.zip "$SPARK_HOME"/python/lib/pyspark
cp -r "$SPARK_HOME/sbin" "$DISTDIR"
cp -r "$SPARK_HOME/ec2" "$DISTDIR"

Expand Down
12 changes: 10 additions & 2 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -361,12 +361,20 @@ object PySparkAssembly {
// to be included in the assembly. We can't just add "python/" to the assembly's resource dir
// list since that will copy unneeded / unwanted files.
resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
val src = new File(BuildCommons.sparkHome, "python/pyspark")

val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
IO.delete(zipFile)
def entries(f: File):List[File] =
f :: (if (f.isDirectory) IO.listFiles(f).toList.flatMap(entries(_)) else Nil)
IO.zip(entries(src).map(
d => (d, d.getAbsolutePath.substring(src.getParent.length +1))),
zipFile)

val dst = new File(outDir, "pyspark")
if (!dst.isDirectory()) {
require(dst.mkdirs())
}

val src = new File(BuildCommons.sparkHome, "python/pyspark")
copy(src, dst)
}
)
Expand Down

0 comments on commit f11f84a

Please sign in to comment.