Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ZEPPELIN-3810] Support Spark 2.4 #3206

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 5 additions & 5 deletions .travis.yml
Expand Up @@ -20,7 +20,7 @@ sudo: false

before_cache:
- sudo chown -R travis:travis $HOME/.m2

cache:
apt: true
directories:
Expand Down Expand Up @@ -98,15 +98,15 @@ matrix:
dist: trusty
env: BUILD_PLUGINS="true" PYTHON="3" SCALA_VER="2.10" PROFILE="-Pspark-1.6 -Pscala-2.10" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl zeppelin-zengine,spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=SparkIntegrationTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false"

# Test spark module for 2.1.0 with scala 2.11
# Test spark module for 2.4.0 with scala 2.11
- jdk: "oraclejdk8"
dist: trusty
env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.11" PROFILE="-Pspark-2.1 -Phadoop2 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.rinterpreter.*,org.apache.spark.api.r.* -DfailIfNoTests=false"
env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.11" PROFILE="-Pspark-2.4 -Phadoop2 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.rinterpreter.*,org.apache.spark.api.r.* -DfailIfNoTests=false"

# Test spark module for 2.0.2 with scala 2.11
# Test spark module for 2.3.2 with scala 2.11
- jdk: "oraclejdk8"
dist: trusty
env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.11" PROFILE="-Pspark-2.0 -Phadoop3 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.rinterpreter.*,org.apache.spark.api.r.* -DfailIfNoTests=false"
env: BUILD_PLUGINS="false" PYTHON="2" SCALA_VER="2.11" PROFILE="-Pspark-2.3 -Phadoop3 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat -am" TEST_FLAG="test -DskipRat -am" MODULES="-pl spark/interpreter,spark/spark-dependencies" TEST_PROJECTS="-Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.rinterpreter.*,org.apache.spark.api.r.* -DfailIfNoTests=false"

# Test python/pyspark with python 2, livy 0.5
- sudo: required
Expand Down
55 changes: 48 additions & 7 deletions spark/interpreter/pom.xml
Expand Up @@ -53,7 +53,7 @@
<pyspark.test.exclude>**/PySparkInterpreterMatplotlibTest.java</pyspark.test.exclude>
<pyspark.test.include>**/*Test.*</pyspark.test.include>


</properties>

<dependencies>
Expand All @@ -69,12 +69,6 @@
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-2.10</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>zeppelin-interpreter-api</artifactId>
Expand Down Expand Up @@ -609,4 +603,51 @@
</plugins>
</build>

<profiles>

<profile>
<id>spark-2.2</id>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-2.10</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>

<profile>
<id>spark-2.1</id>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-2.10</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>

<profile>
<id>spark-2.0</id>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-2.10</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>

<profile>
<id>spark-1.6</id>
<dependencies>
<dependency>
<groupId>org.apache.zeppelin</groupId>
<artifactId>spark-scala-2.10</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>

</profiles>
</project>
Expand Up @@ -31,9 +31,10 @@ public class SparkVersion {
public static final SparkVersion SPARK_2_3_0 = SparkVersion.fromVersionString("2.3.0");
public static final SparkVersion SPARK_2_3_1 = SparkVersion.fromVersionString("2.3.1");
public static final SparkVersion SPARK_2_4_0 = SparkVersion.fromVersionString("2.4.0");
public static final SparkVersion SPARK_3_0_0 = SparkVersion.fromVersionString("3.0.0");

public static final SparkVersion MIN_SUPPORTED_VERSION = SPARK_1_6_0;
public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_2_4_0;
public static final SparkVersion UNSUPPORTED_FUTURE_VERSION = SPARK_3_0_0;

private int version;
private String versionString;
Expand Down
42 changes: 31 additions & 11 deletions spark/pom.xml
Expand Up @@ -49,17 +49,16 @@

<spark.archive>spark-${spark.version}</spark.archive>
<spark.src.download.url>
http://d3kbcqa49mib13.cloudfront.net/${spark.archive}.tgz
https://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}.tgz
</spark.src.download.url>
<spark.bin.download.url>
http://d3kbcqa49mib13.cloudfront.net/${spark.archive}-bin-without-hadoop.tgz
https://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}-bin-without-hadoop.tgz
</spark.bin.download.url>
</properties>

<modules>
<module>interpreter</module>
<module>spark-scala-parent</module>
<module>scala-2.10</module>
<module>scala-2.11</module>
<module>spark-dependencies</module>
<module>spark-shims</module>
Expand Down Expand Up @@ -192,32 +191,47 @@

<profiles>

<profile>
<id>spark-2.4</id>
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I thought this profile is not meant to be used for building. I referred #2880.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, I am verifying this in my personal Travis CI as well (https://travis-ci.org/HyukjinKwon/zeppelin/builds/442994923)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not necessary for building, but it is necessary for running unit test against spark 2.4 in travis

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zjffdu, do you mind what travis update I should do please? Should I build this against spark-2.2, make it download Spark 2.4.0 and test it after setting SPARK_HOME like I did?

Copy link
Contributor

@zjffdu zjffdu Nov 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon Could you update .travis.yml add test matrix for spark 2.4 ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried but looks I don't know how to .. 2.3 looks not being tested as well..

<properties>
<spark.version>2.4.0</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<py4j.version>0.10.7</py4j.version>
</properties>
</profile>

<profile>
<id>spark-2.3</id>
<properties>
<spark.version>2.3.0</spark.version>
<spark.version>2.3.2</spark.version>
<protobuf.version>2.5.0</protobuf.version>
<spark.py4j.version>0.10.6</spark.py4j.version>
<py4j.version>0.10.7</py4j.version>
</properties>
</profile>

<profile>
<id>spark-2.2</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>2.2.0</spark.version>
<spark.version>2.2.1</spark.version>
<py4j.version>0.10.4</py4j.version>
</properties>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<modules>
<module>scala-2.10</module>
</modules>
</profile>

<profile>
<id>spark-2.1</id>
<properties>
<spark.version>2.1.0</spark.version>
<spark.version>2.1.2</spark.version>
<py4j.version>0.10.4</py4j.version>
</properties>
<modules>
<module>scala-2.10</module>
</modules>
</profile>

<profile>
Expand All @@ -226,6 +240,9 @@
<spark.version>2.0.2</spark.version>
<py4j.version>0.10.3</py4j.version>
</properties>
<modules>
<module>scala-2.10</module>
</modules>
</profile>

<profile>
Expand All @@ -234,7 +251,10 @@
<spark.version>1.6.3</spark.version>
<py4j.version>0.9</py4j.version>
</properties>
<modules>
<module>scala-2.10</module>
</modules>
</profile>

</profiles>
</project>
Expand Up @@ -39,6 +39,8 @@ class SparkScala211Interpreter(override val conf: SparkConf,
override val printReplOutput: java.lang.Boolean)
extends BaseSparkScalaInterpreter(conf, depFiles, printReplOutput) {

import SparkScala211Interpreter._

lazy override val LOGGER: Logger = LoggerFactory.getLogger(getClass)

private var sparkILoop: ILoop = _
Expand Down Expand Up @@ -81,7 +83,7 @@ class SparkScala211Interpreter(override val conf: SparkConf,

sparkILoop.in = reader
sparkILoop.initializeSynchronous()
callMethod(sparkILoop, "scala$tools$nsc$interpreter$ILoop$$loopPostInit")
loopPostInit(this)
this.scalaCompleter = reader.completion.completer()

createSparkContext()
Expand All @@ -105,3 +107,72 @@ class SparkScala211Interpreter(override val conf: SparkConf,
sparkILoop.interpret(code)

}

private object SparkScala211Interpreter {

/**
* This is a hack to call `loopPostInit` at `ILoop`. At higher version of Scala such
* as 2.11.12, `loopPostInit` became a nested function which is inaccessible. Here,
* we redefine `loopPostInit` at Scala's 2.11.8 side and ignore `loadInitFiles` being called at
* Scala 2.11.12 since here we do not have to load files.
*
* Both methods `loopPostInit` and `unleashAndSetPhase` are redefined, and `phaseCommand` and
* `asyncMessage` are being called via reflection since both exist in Scala 2.11.8 and 2.11.12.
*
* Please see the codes below:
* https://github.com/scala/scala/blob/v2.11.8/src/repl/scala/tools/nsc/interpreter/ILoop.scala
* https://github.com/scala/scala/blob/v2.11.12/src/repl/scala/tools/nsc/interpreter/ILoop.scala
*
* See also ZEPPELIN-3810.
*/
private def loopPostInit(interpreter: SparkScala211Interpreter): Unit = {
import StdReplTags._
import scala.reflect.classTag
import scala.reflect.io

val sparkILoop = interpreter.sparkILoop
val intp = sparkILoop.intp
val power = sparkILoop.power
val in = sparkILoop.in

def loopPostInit() {
// Bind intp somewhere out of the regular namespace where
// we can get at it in generated code.
intp.quietBind(NamedParam[IMain]("$intp", intp)(tagOfIMain, classTag[IMain]))
// Auto-run code via some setting.
(replProps.replAutorunCode.option
flatMap (f => io.File(f).safeSlurp())
foreach (intp quietRun _)
)
// classloader and power mode setup
intp.setContextClassLoader()
if (isReplPower) {
replProps.power setValue true
unleashAndSetPhase()
asyncMessage(power.banner)
}
// SI-7418 Now, and only now, can we enable TAB completion.
in.postInit()
}

def unleashAndSetPhase() = if (isReplPower) {
power.unleash()
intp beSilentDuring phaseCommand("typer") // Set the phase to "typer"
}

def phaseCommand(name: String): Results.Result = {
interpreter.callMethod(
sparkILoop,
"scala$tools$nsc$interpreter$ILoop$$phaseCommand",
Array(classOf[String]),
Array(name)).asInstanceOf[Results.Result]
}

def asyncMessage(msg: String): Unit = {
interpreter.callMethod(
sparkILoop, "asyncMessage", Array(classOf[String]), Array(msg))
}

loopPostInit()
}
}
2 changes: 1 addition & 1 deletion zeppelin-distribution/src/bin_license/LICENSE
Expand Up @@ -291,7 +291,7 @@ The text of each license is also included at licenses/LICENSE-[project]-[version
(BSD Style) JSch v0.1.53 (http://www.jcraft.com) - http://www.jcraft.com/jsch/LICENSE.txt
(BSD 3 Clause) highlightjs v9.4.0 (https://highlightjs.org/) - https://github.com/isagalaev/highlight.js/blob/9.4.0/LICENSE
(BSD 3 Clause) hamcrest v1.3 (http://hamcrest.org/JavaHamcrest/) - http://opensource.org/licenses/BSD-3-Clause
(BSD Style) JLine v2.12.1 (https://github.com/jline/jline2) - https://github.com/jline/jline2/blob/master/LICENSE.txt
(BSD Style) JLine v2.14.3 (https://github.com/jline/jline2) - https://github.com/jline/jline2/blob/master/LICENSE.txt
(BSD New license) Google Auth Library for Java - Credentials (com.google.auth:google-auth-library-credentials:0.4.0 - https://github.com/google/google-auth-library-java/google-auth-library-credentials)
(BSD New license) Google Auth Library for Java - OAuth2 HTTP (com.google.auth:google-auth-library-oauth2-http:0.4.0 - https://github.com/google/google-auth-library-java/google-auth-library-oauth2-http)
(New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java-util:3.0.0-beta-2 - https://developers.google.com/protocol-buffers/)
Expand Down
2 changes: 1 addition & 1 deletion zeppelin-interpreter/pom.xml
Expand Up @@ -43,7 +43,7 @@
<aether.version>1.12</aether.version>
<maven.aeither.provider.version>3.0.3</maven.aeither.provider.version>
<wagon.version>1.0</wagon.version>
<jline.version>2.12.1</jline.version>
<jline.version>2.14.3</jline.version>
<atomix.version>3.0.0-rc4</atomix.version>
<commons-math3.version>3.1.1</commons-math3.version>
<guava.version>20.0</guava.version>
Expand Down
Expand Up @@ -64,7 +64,7 @@ public class ZeppelinSparkClusterTest extends AbstractTestRestApi {
//ci timeout.
//TODO(zjffdu) remove this after we upgrade it to junit 4.13 (ZEPPELIN-3341)
private static Set<String> verifiedSparkVersions = new HashSet<>();


private String sparkVersion;
private AuthenticationInfo anonymous = new AuthenticationInfo("anonymous");
Expand All @@ -83,10 +83,12 @@ public ZeppelinSparkClusterTest(String sparkVersion) throws Exception {
@Parameterized.Parameters
public static List<Object[]> data() {
return Arrays.asList(new Object[][]{
{"2.2.1"},
{"2.1.2"},
{"2.0.2"},
{"1.6.3"}
{"2.4.0"},
{"2.3.2"},
{"2.2.1"},
{"2.1.2"},
{"2.0.2"},
{"1.6.3"}
});
}

Expand Down
Expand Up @@ -42,10 +42,12 @@ public SparkIntegrationTest(String sparkVersion) {
@Parameterized.Parameters
public static List<Object[]> data() {
return Arrays.asList(new Object[][]{
{"2.2.1"},
{"2.1.2"},
{"2.0.2"},
{"1.6.3"}
{"2.4.0"},
{"2.3.2"},
{"2.2.1"},
{"2.1.2"},
{"2.0.2"},
{"1.6.3"}
});
}

Expand Down