Skip to content

Commit

Permalink
[SPARK-27610][YARN] Shade netty native libraries
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Fixed the `spark-<version>-yarn-shuffle.jar` artifact packaging to shade the native netty libraries:
- shade the `META-INF/native/libnetty_*` native libraries when packagin
the yarn shuffle service jar. This is required as netty library loader
derives that based on shaded package name.
- updated the `org/spark_project` shade package prefix to `org/sparkproject`
(i.e. removed underscore) as the former breaks the netty native lib loading.

This was causing the yarn external shuffle service to fail
when spark.shuffle.io.mode=EPOLL

## How was this patch tested?
Manual tests

Closes #24502 from amuraru/SPARK-27610_master.

Authored-by: Adi Muraru <amuraru@adobe.com>
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
  • Loading branch information
amuraru authored and Marcelo Vanzin committed May 7, 2019
1 parent d124ce9 commit 8ef4da7
Show file tree
Hide file tree
Showing 18 changed files with 68 additions and 21 deletions.
46 changes: 45 additions & 1 deletion common/network-yarn/pom.xml
Expand Up @@ -35,7 +35,7 @@
<!-- Make sure all Hadoop dependencies are provided to avoid repackaging. -->
<hadoop.deps.scope>provided</hadoop.deps.scope>
<shuffle.jar>${project.build.directory}/scala-${scala.binary.version}/spark-${project.version}-yarn-shuffle.jar</shuffle.jar>
<shade>org/spark_project/</shade>
<shade>org/sparkproject/</shade>
</properties>

<dependencies>
Expand Down Expand Up @@ -128,6 +128,50 @@
</execution>
</executions>
</plugin>
<!-- shade the native netty libs as well -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>regex-property</id>
<goals>
<goal>regex-property</goal>
</goals>
<configuration>
<name>spark.shade.native.packageName</name>
<value>${spark.shade.packageName}</value>
<regex>\.</regex>
<replacement>_</replacement>
<failIfNoMatch>true</failIfNoMatch>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>unpack</id>
<phase>package</phase>
<configuration>
<target>
<echo message="Shade netty native libraries to ${spark.shade.native.packageName}" />
<unzip src="${shuffle.jar}" dest="${project.build.directory}/exploded/" />
<move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_epoll_x86_64.so"
tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_epoll_x86_64.so" />
<move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_kqueue_x86_64.jnilib"
tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_kqueue_x86_64.jnilib" />
<jar destfile="${shuffle.jar}" basedir="${project.build.directory}/exploded" />
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>

<!-- probes to validate that those dependencies which must be shaded are -->
<plugin>
Expand Down
4 changes: 2 additions & 2 deletions conf/log4j.properties.template
Expand Up @@ -28,8 +28,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
log4j.logger.org.apache.spark.repl.Main=WARN

# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.sparkproject.jetty=WARN
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
log4j.logger.org.apache.parquet=ERROR
Expand Down
Expand Up @@ -28,8 +28,8 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
log4j.logger.org.apache.spark.repl.Main=WARN

# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.sparkproject.jetty=WARN
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

Expand Down
2 changes: 1 addition & 1 deletion core/src/test/resources/log4j.properties
Expand Up @@ -33,4 +33,4 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%t: %m%n

# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN
4 changes: 2 additions & 2 deletions external/kinesis-asl/src/main/resources/log4j.properties
Expand Up @@ -31,7 +31,7 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.sparkproject.jetty=WARN
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
2 changes: 1 addition & 1 deletion external/kinesis-asl/src/test/resources/log4j.properties
Expand Up @@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN
2 changes: 1 addition & 1 deletion graphx/src/test/resources/log4j.properties
Expand Up @@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN
2 changes: 1 addition & 1 deletion launcher/src/test/resources/log4j.properties
Expand Up @@ -34,4 +34,4 @@ log4j.logger.org.apache.spark.launcher.app.outputredirtest=INFO, outputredirtest
log4j.logger.org.apache.spark.launcher.app.outputredirtest.additivity=false

# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN
2 changes: 1 addition & 1 deletion mllib/src/test/resources/log4j.properties
Expand Up @@ -24,5 +24,5 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

2 changes: 1 addition & 1 deletion pom.xml
Expand Up @@ -202,7 +202,7 @@
<test.include.tags></test.include.tags>

<!-- Package to use when relocating shaded classes. -->
<spark.shade.packageName>org.spark_project</spark.shade.packageName>
<spark.shade.packageName>org.sparkproject</spark.shade.packageName>

<!-- Modules that copy jars to the build directory should do so under this location. -->
<jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>
Expand Down
5 changes: 4 additions & 1 deletion project/MimaExcludes.scala
Expand Up @@ -441,7 +441,9 @@ object MimaExcludes {
// [SPARK-15526][ML][FOLLOWUP] Make JPMML provided scope to avoid including unshaded JARs
(problem: Problem) => problem match {
case MissingClassProblem(cls) =>
!cls.fullName.startsWith("org.spark_project.jpmml") &&
!cls.fullName.startsWith("org.sparkproject.jpmml") &&
!cls.fullName.startsWith("org.sparkproject.dmg.pmml") &&
!cls.fullName.startsWith("org.spark_project.jpmml") &&
!cls.fullName.startsWith("org.spark_project.dmg.pmml")
case _ => true
}
Expand Down Expand Up @@ -716,6 +718,7 @@ object MimaExcludes {
ProblemFilters.exclude[Problem]("org.apache.spark.rpc.*"),
ProblemFilters.exclude[Problem]("org.spark-project.jetty.*"),
ProblemFilters.exclude[Problem]("org.spark_project.jetty.*"),
ProblemFilters.exclude[Problem]("org.sparkproject.jetty.*"),
ProblemFilters.exclude[Problem]("org.apache.spark.internal.*"),
ProblemFilters.exclude[Problem]("org.apache.spark.unused.*"),
ProblemFilters.exclude[Problem]("org.apache.spark.unsafe.*"),
Expand Down
2 changes: 1 addition & 1 deletion repl/src/test/resources/log4j.properties
Expand Up @@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN
Expand Up @@ -28,4 +28,4 @@ log4j.logger.com.sun.jersey=WARN
log4j.logger.org.apache.hadoop=WARN
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.mortbay=WARN
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN
Expand Up @@ -28,4 +28,4 @@ log4j.logger.com.sun.jersey=WARN
log4j.logger.org.apache.hadoop=WARN
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.mortbay=WARN
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN
2 changes: 1 addition & 1 deletion resource-managers/yarn/src/test/resources/log4j.properties
Expand Up @@ -28,4 +28,4 @@ log4j.logger.com.sun.jersey=WARN
log4j.logger.org.apache.hadoop=WARN
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.mortbay=WARN
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN
Expand Up @@ -52,7 +52,7 @@ abstract class BaseYarnClusterSuite
|log4j.logger.org.apache.hadoop=WARN
|log4j.logger.org.eclipse.jetty=WARN
|log4j.logger.org.mortbay=WARN
|log4j.logger.org.spark_project.jetty=WARN
|log4j.logger.org.sparkproject.jetty=WARN
""".stripMargin

private var yarnCluster: MiniYARNCluster = _
Expand Down
2 changes: 1 addition & 1 deletion sql/catalyst/src/test/resources/log4j.properties
Expand Up @@ -24,4 +24,4 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN
2 changes: 1 addition & 1 deletion streaming/src/test/resources/log4j.properties
Expand Up @@ -24,5 +24,5 @@ log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.sparkproject.jetty=WARN

0 comments on commit 8ef4da7

Please sign in to comment.