From 7b596ea5c28574647a93f3ed3cfb486bbb05cf2e Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Wed, 28 Sep 2016 23:23:57 -0400 Subject: [PATCH 01/36] Generalize SparkOutputStream --- .../apache/zeppelin/spark/PySparkInterpreter.java | 5 +++-- .../org/apache/zeppelin/spark/SparkInterpreter.java | 5 +++-- .../java/org/apache/zeppelin/spark/ZeppelinR.java | 5 +++-- .../interpreter/util/InterpreterOutputStream.java | 13 ++++++++----- .../zeppelin/interpreter/util}/LogOutputStream.java | 3 +-- 5 files changed, 18 insertions(+), 13 deletions(-) rename spark/src/main/java/org/apache/zeppelin/spark/SparkOutputStream.java => zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/util/InterpreterOutputStream.java (89%) rename {spark/src/main/java/org/apache/zeppelin/spark => zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/util}/LogOutputStream.java (98%) diff --git a/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java index 582cb6b06c5..13407b22a3c 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/PySparkInterpreter.java @@ -55,6 +55,7 @@ import org.apache.zeppelin.interpreter.WrappedInterpreter; import org.apache.zeppelin.interpreter.InterpreterResult.Code; import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; +import org.apache.zeppelin.interpreter.util.InterpreterOutputStream; import org.apache.zeppelin.spark.dep.SparkDependencyContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,7 +72,7 @@ public class PySparkInterpreter extends Interpreter implements ExecuteResultHand private GatewayServer gatewayServer; private DefaultExecutor executor; private int port; - private SparkOutputStream outputStream; + private InterpreterOutputStream outputStream; private BufferedWriter ins; private PipedInputStream in; private ByteArrayOutputStream input; @@ -196,7 +197,7 @@ private void createGatewayServerAndStartScript() { cmd.addArgument(Integer.toString(port), false); cmd.addArgument(Integer.toString(getSparkInterpreter().getSparkVersion().toNumber()), false); executor = new DefaultExecutor(); - outputStream = new SparkOutputStream(logger); + outputStream = new InterpreterOutputStream(logger); PipedOutputStream ps = new PipedOutputStream(); in = null; try { diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java index 53bf30b9533..41e83ef6f6b 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/SparkInterpreter.java @@ -55,6 +55,7 @@ import org.apache.zeppelin.interpreter.InterpreterResult.Code; import org.apache.zeppelin.interpreter.InterpreterUtils; import org.apache.zeppelin.interpreter.WrappedInterpreter; +import org.apache.zeppelin.interpreter.util.InterpreterOutputStream; import org.apache.zeppelin.resource.ResourcePool; import org.apache.zeppelin.resource.WellKnownResourceName; import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; @@ -110,7 +111,7 @@ public class SparkInterpreter extends Interpreter { private static Integer sharedInterpreterLock = new Integer(0); private static AtomicInteger numReferenceOfSparkContext = new AtomicInteger(0); - private SparkOutputStream out; + private InterpreterOutputStream out; private SparkDependencyResolver dep; /** @@ -126,7 +127,7 @@ public class SparkInterpreter extends Interpreter { public SparkInterpreter(Properties property) { super(property); - out = new SparkOutputStream(logger); + out = new InterpreterOutputStream(logger); } public SparkInterpreter(Properties property, SparkContext sc) { diff --git a/spark/src/main/java/org/apache/zeppelin/spark/ZeppelinR.java b/spark/src/main/java/org/apache/zeppelin/spark/ZeppelinR.java index 961793db17e..98c6de301fc 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/ZeppelinR.java +++ b/spark/src/main/java/org/apache/zeppelin/spark/ZeppelinR.java @@ -22,6 +22,7 @@ import org.apache.zeppelin.interpreter.InterpreterException; import org.apache.zeppelin.interpreter.InterpreterOutput; import org.apache.zeppelin.interpreter.InterpreterOutputListener; +import org.apache.zeppelin.interpreter.util.InterpreterOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,7 +39,7 @@ public class ZeppelinR implements ExecuteResultHandler { private final String rCmdPath; private final SparkVersion sparkVersion; private DefaultExecutor executor; - private SparkOutputStream outputStream; + private InterpreterOutputStream outputStream; private PipedOutputStream input; private final String scriptPath; private final String libPath; @@ -146,7 +147,7 @@ public void open() throws IOException { logger.debug(cmd.toString()); executor = new DefaultExecutor(); - outputStream = new SparkOutputStream(logger); + outputStream = new InterpreterOutputStream(logger); input = new PipedOutputStream(); PipedInputStream in = new PipedInputStream(input); diff --git a/spark/src/main/java/org/apache/zeppelin/spark/SparkOutputStream.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/util/InterpreterOutputStream.java similarity index 89% rename from spark/src/main/java/org/apache/zeppelin/spark/SparkOutputStream.java rename to zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/util/InterpreterOutputStream.java index e454994aa0d..b6f01b1a7ca 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/SparkOutputStream.java +++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/util/InterpreterOutputStream.java @@ -14,7 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.zeppelin.spark; + +package org.apache.zeppelin.interpreter.util; import org.apache.zeppelin.interpreter.InterpreterOutput; import org.slf4j.Logger; @@ -22,14 +23,15 @@ import java.io.IOException; /** - * InterpreterOutput can be attached / detached. + * Output Stream integrated with InterpreterOutput. + * + * Can be used to channel output from interpreters. */ -public class SparkOutputStream extends LogOutputStream { - +public class InterpreterOutputStream extends LogOutputStream { public static Logger logger; InterpreterOutput interpreterOutput; - public SparkOutputStream(Logger logger) { + public InterpreterOutputStream(Logger logger) { this.logger = logger; } @@ -78,6 +80,7 @@ public void close() throws IOException { } } + @Override public void flush() throws IOException { super.flush(); diff --git a/spark/src/main/java/org/apache/zeppelin/spark/LogOutputStream.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/util/LogOutputStream.java similarity index 98% rename from spark/src/main/java/org/apache/zeppelin/spark/LogOutputStream.java rename to zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/util/LogOutputStream.java index d941cd772c6..e77f441f90d 100644 --- a/spark/src/main/java/org/apache/zeppelin/spark/LogOutputStream.java +++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/util/LogOutputStream.java @@ -15,13 +15,12 @@ * limitations under the License. */ -package org.apache.zeppelin.spark; +package org.apache.zeppelin.interpreter.util; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; - /** * Minor modification of LogOutputStream of apache commons exec. * LogOutputStream of apache commons exec has one issue that method flush doesn't throw IOException, From 38abaf9e04ebc4c1797d22f9f6ac1fc4f11feb46 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Wed, 28 Sep 2016 00:36:20 -0400 Subject: [PATCH 02/36] Add Scio interpreter --- conf/interpreter-list | 1 + conf/zeppelin-site.xml.template | 2 +- pom.xml | 10 +- scio/README.md | 18 + scio/pom.xml | 341 ++++++++++++++++++ .../main/resources/interpreter-setting.json | 19 + .../zeppelin/scio/ScioInterpreter.scala | 165 +++++++++ .../zeppelin/conf/ZeppelinConfiguration.java | 5 +- 8 files changed, 557 insertions(+), 4 deletions(-) create mode 100644 scio/README.md create mode 100644 scio/pom.xml create mode 100644 scio/src/main/resources/interpreter-setting.json create mode 100644 scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala diff --git a/conf/interpreter-list b/conf/interpreter-list index 38cb386d8cd..c22afef8a65 100644 --- a/conf/interpreter-list +++ b/conf/interpreter-list @@ -35,4 +35,5 @@ md org.apache.zeppelin:zeppelin-markdown:0.6.1 Markdown pig org.apache.zeppelin:zeppelin-pig:0.6.1 Pig interpreter postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.1 Postgresql interpreter python org.apache.zeppelin:zeppelin-python:0.6.1 Python interpreter +scio org.apache.zeppelin:zeppelin-scio:0.6.1 Scio interpreter shell org.apache.zeppelin:zeppelin-shell:0.6.1 Shell command diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template index c4b369c301c..36f7e190dfb 100755 --- a/conf/zeppelin-site.xml.template +++ b/conf/zeppelin-site.xml.template @@ -190,7 +190,7 @@ zeppelin.interpreters - org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter, org.apache.zeppelin.pig.PigQueryInterpreter + org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter,org.apache.zeppelin.pig.PigQueryInterpreter,org.apache.zeppelin.scio.ScioInterpreter Comma separated interpreter configurations. First interpreter become a default diff --git a/pom.xml b/pom.xml index c9b4e0214a2..c9abfbd1522 100644 --- a/pom.xml +++ b/pom.xml @@ -75,6 +75,7 @@ elasticsearch bigquery alluxio + scio zeppelin-web zeppelin-server zeppelin-distribution @@ -589,7 +590,14 @@ beam - + + + scio + + scio + + + examples diff --git a/scio/README.md b/scio/README.md new file mode 100644 index 00000000000..b95c548b7fd --- /dev/null +++ b/scio/README.md @@ -0,0 +1,18 @@ +Scio interpreter for Apache Zeppelin +==================================== + +## Raison d'être: + +Provide Scio Interpreter for Zeppelin. + +## Build + +``` +mvn -pl zeppelin-interpreter,zeppelin-display,scio -DskipTests package +``` + +## Test + +``` +mvn -pl scio,zeppelin-display,zeppelin-interpreter -Dtest='org.apache.zeppelin.scio.*' -DfailIfNoTests=false test +``` diff --git a/scio/pom.xml b/scio/pom.xml new file mode 100644 index 00000000000..742c78ecf79 --- /dev/null +++ b/scio/pom.xml @@ -0,0 +1,341 @@ + + + + + 4.0.0 + + + zeppelin + org.apache.zeppelin + 0.7.0-SNAPSHOT + .. + + + org.apache.zeppelin + zeppelin-scio_2.10 + jar + 0.7.0-SNAPSHOT + Zeppelin: Scio + Zeppelin Scio support + + + 0.2.3 + + + + + org.slf4j + slf4j-api + + + + org.slf4j + slf4j-log4j12 + + + + ${project.groupId} + zeppelin-display_${scala.binary.version} + ${project.version} + + + + ${project.groupId} + zeppelin-interpreter + ${project.version} + + + + com.spotify + scio-repl_${scala.binary.version} + ${scio.version} + + + + com.google.guava + guava + 14.0.1 + + + + + org.apache.maven + maven-plugin-api + 3.0 + + + org.codehaus.plexus + plexus-utils + + + org.sonatype.sisu + sisu-inject-plexus + + + org.apache.maven + maven-model + + + + + org.sonatype.aether + aether-api + 1.12 + + + org.sonatype.aether + aether-util + 1.12 + + + org.sonatype.aether + aether-impl + 1.12 + + + + org.apache.maven + maven-aether-provider + 3.0.3 + + + org.sonatype.aether + aether-api + + + org.sonatype.aether + aether-spi + + + org.sonatype.aether + aether-util + + + org.sonatype.aether + aether-impl + + + org.codehaus.plexus + plexus-utils + + + + + + org.sonatype.aether + aether-connector-file + 1.12 + + + + org.sonatype.aether + aether-connector-wagon + 1.12 + + + org.apache.maven.wagon + wagon-provider-api + + + + + + org.apache.maven.wagon + wagon-provider-api + 1.0 + + + org.codehaus.plexus + plexus-utils + + + + + + org.apache.maven.wagon + wagon-http-lightweight + 1.0 + + + org.apache.maven.wagon + wagon-http-shared + + + + + + org.apache.maven.wagon + wagon-http + 1.0 + + + + + + org.apache.commons + commons-exec + 1.3 + + + + org.scala-lang + scala-library + ${scala.version} + + + + org.scala-lang + scala-compiler + ${scala.version} + + + + org.scala-lang + scala-reflect + ${scala.version} + + + + commons-lang + commons-lang + + + + org.apache.commons + commons-compress + 1.9 + + + + + + + + maven-enforcer-plugin + 1.3.1 + + + enforce + none + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.17 + + 1 + false + -Xmx1024m -XX:MaxPermSize=256m + + + + + org.apache.maven.plugins + maven-shade-plugin + 2.3 + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + reference.conf + + + + + + package + + shade + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 2.8 + + + package + + copy + + + ${project.build.directory}/../../interpreter/scio + false + false + true + runtime + + + ${project.groupId} + ${project.artifactId} + ${project.version} + ${project.packaging} + + + + + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + + compile + + compile + + compile + + + test-compile + + testCompile + + test-compile + + + process-resources + + compile + + + + + + + diff --git a/scio/src/main/resources/interpreter-setting.json b/scio/src/main/resources/interpreter-setting.json new file mode 100644 index 00000000000..c98c32a786e --- /dev/null +++ b/scio/src/main/resources/interpreter-setting.json @@ -0,0 +1,19 @@ +[ + { + "group": "scio", + "name": "scio", + "className": "org.apache.zeppelin.scio.ScioInterpreter", + "defaultInterpreter": true, + "properties": { + "args": { + "envName": null, + "propertyName": null, + "defaultValue": "--runner=InProcessPipelineRunner", + "description": "Scio commandline args" + } + }, + "editor": { + "language": "scala" + } + } +] diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala new file mode 100644 index 00000000000..67276b251d3 --- /dev/null +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio + +import java.io.PrintStream +import java.util +import java.util.Properties + +import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner +import com.spotify.scio.repl.{ScioILoop, ScioReplClassLoader} +import org.apache.zeppelin.interpreter.Interpreter.FormType +import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion +import org.apache.zeppelin.interpreter.util.InterpreterOutputStream +import org.apache.zeppelin.interpreter.{Interpreter, InterpreterContext, InterpreterResult} +import org.slf4j.LoggerFactory + +import scala.reflect.io.File +import scala.tools.nsc.Settings +import scala.tools.nsc.interpreter.JPrintWriter +import scala.tools.nsc.util.ClassPath + +class ScioInterpreter(property: Properties) extends Interpreter(property) { + private val logger = LoggerFactory.getLogger(classOf[ScioInterpreter]) + private var REPL: ScioILoop = null + + val innerOut = new InterpreterOutputStream(logger) + + override def open(): Unit = { + val args: List[String] = Option(getProperty("args")) + .getOrElse(s"--runner=${classOf[InProcessPipelineRunner].getSimpleName}") + .split(" ") + .map(_.trim) + .toList + + val settings = new Settings() + + // For scala 2.10 - usejavacp + if (scala.util.Properties.versionString.contains("2.10.")) { + settings.classpath.append(System.getProperty("java.class.path")) + settings.usejavacp.value = true + } + + def classLoaderURLs(cl: ClassLoader): Array[java.net.URL] = cl match { + case null => Array() + case u: java.net.URLClassLoader => u.getURLs ++ classLoaderURLs(cl.getParent) + case _ => classLoaderURLs(cl.getParent) + } + + classLoaderURLs(Thread.currentThread().getContextClassLoader) + .foreach(u => settings.classpath.append(u.getPath)) + + // We have to make sure that scala macros are expandable. paradise plugin has to be added to + // -Xplugin paths. In case of assembly - paradise is included in assembly jar - thus we add + // itself to -Xplugin. If shell is started from sbt or classpath, paradise jar has to be in + // classpath, we find it and add it to -Xplugin. + + // Repl assembly includes paradise's scalac-plugin.xml - required for BigQuery macro + // There should be no harm if we keep this for sbt launch. + val thisJar = this.getClass.getProtectionDomain.getCodeSource.getLocation.getPath + // In some cases this may be `target/classes` + if(thisJar.endsWith(".jar")) { + settings.plugin.appendToValue(thisJar) + } + + ClassPath.split(settings.classpath.value) + .find(File(_).name.startsWith("paradise_")) + .foreach(settings.plugin.appendToValue) + + // Force the repl to be synchronous, so all cmds are executed in the same thread + settings.Yreplsync.value = true + + val scioClassLoader = new ScioReplClassLoader( + ClassPath.toURLs(settings.classpath.value).toArray ++ + classLoaderURLs(Thread.currentThread().getContextClassLoader), + null, + Thread.currentThread.getContextClassLoader) + + REPL = new ScioILoop(scioClassLoader, args, None, new JPrintWriter(innerOut)) + scioClassLoader.setRepl(REPL) + + // Set classloader chain - expose top level abstract class loader down + // the chain to allow for readObject and latestUserDefinedLoader + // See https://gist.github.com/harrah/404272 + settings.embeddedDefaults(scioClassLoader) + + // No need for bigquery dumps + sys.props("bigquery.plugin.disable.dump") = "true" + + REPL.settings_=(settings) + REPL.createInterpreter() + } + + override def close(): Unit = { + logger.info("Closing Scio interpreter!") + REPL.closeInterpreter() + } + + override def interpret(code: String, context: InterpreterContext): InterpreterResult = { + val paragraphId = context.getParagraphId + + val consoleOut = new PrintStream(innerOut) + System.setOut(consoleOut) + innerOut.setInterpreterOutput(context.out) + + try { + import tools.nsc.interpreter.Results._ + REPL.interpret(code) match { + case Success => { + logger.debug(s"Successfully executed `$code` in $paragraphId") + new InterpreterResult(InterpreterResult.Code.SUCCESS) + } + case Error => { + logger.error(s"Error executing `$code` in $paragraphId") + new InterpreterResult(InterpreterResult.Code.ERROR) + } + case Incomplete => { + logger.warn(s"Code `$code` not complete in $paragraphId") + new InterpreterResult(InterpreterResult.Code.INCOMPLETE, "Incomplete expression") + } + } + } catch { + case e: Exception => + logger.info("Interpreter exception", e) + new InterpreterResult(InterpreterResult.Code.ERROR, e.getMessage) + } finally { + innerOut.flush() + innerOut.setInterpreterOutput(null) + consoleOut.flush() + } + } + + override def cancel(context: InterpreterContext): Unit = { + // not implemented + } + + override def getFormType: FormType = { + FormType.NATIVE + } + + override def getProgress(context: InterpreterContext): Int = { + // not implemented + 42 + } + + override def completion(buf: String, cursor: Int): util.List[InterpreterCompletion] = { + //TODO: implement, delegate? + super.completion(buf, cursor) + } + +} diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java index b9a3bfc6780..d99c52950b8 100644 --- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java @@ -544,7 +544,8 @@ public static enum ConfVars { + "org.apache.zeppelin.jdbc.JDBCInterpreter," + "org.apache.zeppelin.hbase.HbaseInterpreter," + "org.apache.zeppelin.bigquery.BigQueryInterpreter," - + "org.apache.zeppelin.beam.BeamInterpreter"), + + "org.apache.zeppelin.beam.BeamInterpreter," + + "org.apache.zeppelin.scio.ScioInterpreter"), ZEPPELIN_INTERPRETER_JSON("zeppelin.interpreter.setting", "interpreter-setting.json"), ZEPPELIN_INTERPRETER_DIR("zeppelin.interpreter.dir", "interpreter"), ZEPPELIN_INTERPRETER_LOCALREPO("zeppelin.interpreter.localRepo", "local-repo"), @@ -552,7 +553,7 @@ public static enum ConfVars { ZEPPELIN_INTERPRETER_MAX_POOL_SIZE("zeppelin.interpreter.max.poolsize", 10), ZEPPELIN_INTERPRETER_GROUP_ORDER("zeppelin.interpreter.group.order", "spark,md,angular,sh," + "livy,alluxio,file,psql,flink,python,ignite,lens,cassandra,geode,kylin,elasticsearch," - + "scalding,jdbc,hbase,bigquery,beam,pig"), + + "scalding,jdbc,hbase,bigquery,beam,pig,scio"), ZEPPELIN_ENCODING("zeppelin.encoding", "UTF-8"), ZEPPELIN_NOTEBOOK_DIR("zeppelin.notebook.dir", "notebook"), // use specified notebook (id) as homescreen From 570cfaabf1b70ccaaa41816ba7d7dc808acb6e1f Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Wed, 28 Sep 2016 21:16:42 -0400 Subject: [PATCH 03/36] Add Scio interpreter tests --- .../zeppelin/scio/ScioInterpreterTest.java | 95 +++++++++++++++++++ scio/src/test/resources/log4j.properties | 22 +++++ 2 files changed, 117 insertions(+) create mode 100644 scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java create mode 100644 scio/src/test/resources/log4j.properties diff --git a/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java b/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java new file mode 100644 index 00000000000..07d5593c159 --- /dev/null +++ b/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio; + +import org.apache.zeppelin.display.AngularObjectRegistry; +import org.apache.zeppelin.display.GUI; +import org.apache.zeppelin.interpreter.*; +import org.apache.zeppelin.resource.LocalResourcePool; +import org.apache.zeppelin.user.AuthenticationInfo; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Properties; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class ScioInterpreterTest { + private static ScioInterpreter repl; + private static InterpreterGroup intpGroup; + private InterpreterContext context; + + private static Properties getScioTestProperties() { + Properties p = new Properties(); + //TODO: do we need some properties here? + return p; + } + + @Before + public void setUp() throws Exception { + if (repl == null) { + intpGroup = new InterpreterGroup(); + intpGroup.put("note", new LinkedList()); + repl = new ScioInterpreter(getScioTestProperties()); + repl.setInterpreterGroup(intpGroup); + intpGroup.get("note").add(repl); + repl.open(); + } + + context = new InterpreterContext("note", "id", "title", "text", + new AuthenticationInfo(), + new HashMap(), + new GUI(), + new AngularObjectRegistry(intpGroup.getId(), null), + new LocalResourcePool("id"), + new LinkedList(), + new InterpreterOutput(new InterpreterOutputListener() { + @Override + public void onAppend(InterpreterOutput out, byte[] line) { + } + + @Override + public void onUpdate(InterpreterOutput out, byte[] output) { + } + })); + } + + @Test + public void testBasicIntp() { + assertEquals(InterpreterResult.Code.SUCCESS, + repl.interpret("val a = 1\nval b = 2", context).code()); + + assertEquals(InterpreterResult.Code.ERROR, + repl.interpret("val a:Int = 'ds'", context).code()); + + InterpreterResult incomplete = repl.interpret("val a = \"\"\"", context); + assertEquals(InterpreterResult.Code.INCOMPLETE, incomplete.code()); + assertTrue(incomplete.message().length() > 0); + } + + @Test + public void testBasicPipeline() { + assertEquals(InterpreterResult.Code.SUCCESS, + repl.interpret("sc.parallelize(1 to 10).closeAndCollect().toList", context).code()); + } +} diff --git a/scio/src/test/resources/log4j.properties b/scio/src/test/resources/log4j.properties new file mode 100644 index 00000000000..8daee59d60d --- /dev/null +++ b/scio/src/test/resources/log4j.properties @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger = INFO, stdout + +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n From 6ff4e95c5478733c58430b46ca8a796aebe781a5 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 29 Sep 2016 02:27:11 -0400 Subject: [PATCH 04/36] Inject argz to the Scio interpreter --- .../main/resources/interpreter-setting.json | 4 +-- .../zeppelin/scio/ScioInterpreter.scala | 29 +++++++++++++++++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/scio/src/main/resources/interpreter-setting.json b/scio/src/main/resources/interpreter-setting.json index c98c32a786e..422e7cd3402 100644 --- a/scio/src/main/resources/interpreter-setting.json +++ b/scio/src/main/resources/interpreter-setting.json @@ -5,11 +5,11 @@ "className": "org.apache.zeppelin.scio.ScioInterpreter", "defaultInterpreter": true, "properties": { - "args": { + "argz": { "envName": null, "propertyName": null, "defaultValue": "--runner=InProcessPipelineRunner", - "description": "Scio commandline args" + "description": "Scio interpreter wide arguments" } }, "editor": { diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index 67276b251d3..2b7a0434d48 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -17,10 +17,12 @@ package org.apache.zeppelin.scio +import java.beans.Introspector import java.io.PrintStream import java.util import java.util.Properties +import com.google.cloud.dataflow.sdk.options.{PipelineOptions, PipelineOptionsFactory} import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner import com.spotify.scio.repl.{ScioILoop, ScioReplClassLoader} import org.apache.zeppelin.interpreter.Interpreter.FormType @@ -41,7 +43,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { val innerOut = new InterpreterOutputStream(logger) override def open(): Unit = { - val args: List[String] = Option(getProperty("args")) + val args: List[String] = Option(getProperty("argz")) .getOrElse(s"--runner=${classOf[InProcessPipelineRunner].getSimpleName}") .split(" ") .map(_.trim) @@ -90,7 +92,9 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { null, Thread.currentThread.getContextClassLoader) - REPL = new ScioILoop(scioClassLoader, args, None, new JPrintWriter(innerOut)) + val (dfArgs, _) = parseAndPartitionArgs(args) + + REPL = new ScioILoop(scioClassLoader, dfArgs, None, new JPrintWriter(innerOut)) scioClassLoader.setRepl(REPL) // Set classloader chain - expose top level abstract class loader down @@ -103,6 +107,27 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { REPL.settings_=(settings) REPL.createInterpreter() + REPL.interpret(s"""val argz = Array("${args.mkString("\", \"")}")""") + } + + private def parseAndPartitionArgs(args: List[String]): (List[String], List[String]) = { + import scala.collection.JavaConverters._ + // Extract --pattern of all registered derived types of PipelineOptions + val classes = PipelineOptionsFactory.getRegisteredOptions.asScala + classOf[PipelineOptions] + val optPatterns = classes.flatMap { cls => + cls.getMethods.flatMap { m => + val n = m.getName + if ((!n.startsWith("get") && !n.startsWith("is")) || + m.getParameterTypes.nonEmpty || m.getReturnType == classOf[Unit]) { + None + } else { + Some(Introspector.decapitalize(n.substring(if (n.startsWith("is")) 2 else 3))) + } + }.map(s => s"--$s($$|=)".r) + } + + // Split cmdlineArgs into 2 parts, optArgs for PipelineOptions and appArgs for Args + args.partition(arg => optPatterns.exists(_.findFirstIn(arg).isDefined)) } override def close(): Unit = { From 9a21aa0a641dc05685f096ff1524a7bc5caa640a Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 29 Sep 2016 14:23:36 -0400 Subject: [PATCH 05/36] Add display helpers --- .../zeppelin/scio/ScioInterpreter.scala | 7 +- .../org/apache/zeppelin/scio/package.scala | 120 ++++++++++++++++++ 2 files changed, 124 insertions(+), 3 deletions(-) create mode 100644 scio/src/main/scala/org/apache/zeppelin/scio/package.scala diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index 2b7a0434d48..b46ea06437e 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -43,7 +43,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { val innerOut = new InterpreterOutputStream(logger) override def open(): Unit = { - val args: List[String] = Option(getProperty("argz")) + val argz: List[String] = Option(getProperty("argz")) .getOrElse(s"--runner=${classOf[InProcessPipelineRunner].getSimpleName}") .split(" ") .map(_.trim) @@ -92,7 +92,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { null, Thread.currentThread.getContextClassLoader) - val (dfArgs, _) = parseAndPartitionArgs(args) + val (dfArgs, _) = parseAndPartitionArgs(argz) REPL = new ScioILoop(scioClassLoader, dfArgs, None, new JPrintWriter(innerOut)) scioClassLoader.setRepl(REPL) @@ -107,7 +107,8 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { REPL.settings_=(settings) REPL.createInterpreter() - REPL.interpret(s"""val argz = Array("${args.mkString("\", \"")}")""") + REPL.interpret(s"""val argz = Array("${argz.mkString("\", \"")}")""") + REPL.interpret("import org.apache.zeppelin.scio._") } private def parseAndPartitionArgs(args: List[String]): (List[String], List[String]) = { diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/package.scala b/scio/src/main/scala/org/apache/zeppelin/scio/package.scala new file mode 100644 index 00000000000..c6802b384e5 --- /dev/null +++ b/scio/src/main/scala/org/apache/zeppelin/scio/package.scala @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin + +import com.spotify.scio.values.SCollection +import com.spotify.scio._ + +import scala.reflect.ClassTag + +package object scio { + private val SCollectionEmptyMsg = "\n%html Result SCollection is empty!\n" + private val maxResults = 1000 + + private def materialize[T: ClassTag](self: SCollection[T]) = { + val f = self.materialize + self.context.close() + f + } + + private def notifIfTruncated(it: Iterator[_]): Unit = { + if(it.hasNext) + println("\nResults are limited to " + maxResults + ".\n") + } + + // TODO: scala 2.11 + // implicit class ZeppelinSCollection[T: ClassTag](private val self: SCollection[T]) extends AnyVal { + implicit class ZeppelinSCollection[T: ClassTag](val self: SCollection[T]) + (implicit ev: T <:< AnyVal) { + /** Convenience method to close the current [[com.spotify.scio.ScioContext]] + * and display elements from SCollection. */ + def closeAndDisplay(printer: (T) => String = (e: T) => e.toString): Unit = { + val it = materialize(self).waitForResult().value + + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + println(s"""%table value\n${it.take(maxResults).map(printer).mkString("\n")}""") + notifIfTruncated(it) + } + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinSCollection[T: ClassTag](private val self: SCollection[T]) extends AnyVal { + implicit class ZeppelinStringSCollection[T: ClassTag](val self: SCollection[T]) + (implicit ev: T <:< String) { + /** Convenience method to close the current [[com.spotify.scio.ScioContext]] + * and display elements from SCollection. */ + def closeAndDisplay(printer: (T) => String = (e: T) => e.toString): Unit = { + val it = materialize(self).waitForResult().value + + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + println(s"""%table value\n${it.take(maxResults).map(printer).mkString("\n")}""") + notifIfTruncated(it) + } + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinKVSCollection[K: ClassTag, V: ClassTag](val self: SCollection[(K, V)]) extends AnyVal { + implicit class ZeppelinKVSCollection[K: ClassTag, V: ClassTag](val self: SCollection[(K, V)]) { + /** Convenience method to close the current [[com.spotify.scio.ScioContext]] + * and display elements from KV SCollection. */ + def closeAndDisplay(): Unit = { + val it = materialize(self).waitForResult().value + + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + val content = it.take(maxResults).map{ case (k, v) => s"$k\t$v" }.mkString("\n") + println(s"""%table key\tvalue\n$content""") + notifIfTruncated(it) + } + } + + } + + // TODO: scala 2.11 + // implicit class ZeppelinProductSCollection[T: ClassTag](val self: SCollection[T])(implicit ev: T <:< Product) extends AnyVal { + implicit class ZeppelinProductSCollection[T: ClassTag](val self: SCollection[T]) + (implicit ev: T <:< Product) { + /** Convenience method to close the current [[com.spotify.scio.ScioContext]] + * and display elements from Product like SCollection */ + def closeAndDisplay(): Unit = { + val it = materialize(self).waitForResult().value + + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + val first = it.next() + //TODO is this safe field name to value iterator? + val fieldNames = first.getClass.getDeclaredFields.map(_.getName) + + val header = fieldNames.mkString("\t") + val firstStr = first.productIterator.mkString("\t") + val content = it.take(maxResults).map(_.productIterator.mkString("\t")).mkString("\n") + println(s"""%table $header\n$firstStr\n$content""") + notifIfTruncated(it) + } + } + } + +} From 89a281185363a465bd49006927f3efbabb217bc2 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 29 Sep 2016 17:34:41 -0400 Subject: [PATCH 06/36] Add limit of display --- scio/src/main/resources/interpreter-setting.json | 6 ++++++ scio/src/main/scala/org/apache/zeppelin/scio/package.scala | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/scio/src/main/resources/interpreter-setting.json b/scio/src/main/resources/interpreter-setting.json index 422e7cd3402..ce939e54fe3 100644 --- a/scio/src/main/resources/interpreter-setting.json +++ b/scio/src/main/resources/interpreter-setting.json @@ -10,6 +10,12 @@ "propertyName": null, "defaultValue": "--runner=InProcessPipelineRunner", "description": "Scio interpreter wide arguments" + }, + "zeppelin.scio.maxResult": { + "envName": "ZEPPELIN_SCIO_MAXRESULT", + "propertyName": "zeppelin.scio.maxResult", + "defaultValue": "1000", + "description": "Max number of SCollection result to display." } }, "editor": { diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/package.scala b/scio/src/main/scala/org/apache/zeppelin/scio/package.scala index c6802b384e5..f920e5aa222 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/package.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/package.scala @@ -24,7 +24,7 @@ import scala.reflect.ClassTag package object scio { private val SCollectionEmptyMsg = "\n%html Result SCollection is empty!\n" - private val maxResults = 1000 + private val maxResults = Integer.getInteger("zeppelin.scio.maxResult", 1000) private def materialize[T: ClassTag](self: SCollection[T]) = { val f = self.materialize @@ -34,7 +34,7 @@ package object scio { private def notifIfTruncated(it: Iterator[_]): Unit = { if(it.hasNext) - println("\nResults are limited to " + maxResults + ".\n") + println("\nResults are limited to " + maxResults + " rows.\n") } // TODO: scala 2.11 From be252f81cc21d3cce8911474a4683cfd7d5be1fa Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 29 Sep 2016 18:31:06 -0400 Subject: [PATCH 07/36] Add avro display --- .../org/apache/zeppelin/scio/package.scala | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/package.scala b/scio/src/main/scala/org/apache/zeppelin/scio/package.scala index f920e5aa222..b51559768a0 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/package.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/package.scala @@ -17,8 +17,10 @@ package org.apache.zeppelin -import com.spotify.scio.values.SCollection import com.spotify.scio._ +import com.spotify.scio.values.SCollection +import org.apache.avro.Schema +import org.apache.avro.generic.GenericRecord import scala.reflect.ClassTag @@ -117,4 +119,31 @@ package object scio { } } + // TODO: scala 2.11 + // implicit class ZeppelinAvroSCollection[T: ClassTag](val self: SCollection[T])(implicit ev: T <:< GenericRecord) extends AnyVal { + implicit class ZeppelinAvroSCollection[T: ClassTag](val self: SCollection[T]) + (implicit ev: T <:< GenericRecord) { + /** Convenience method to close the current [[com.spotify.scio.ScioContext]] + * and display elements from Avro like SCollection */ + def closeAndDisplay(schema: Schema = null): Unit = { + val it = materialize(self).waitForResult().value + + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + val first = it.next() + import collection.JavaConverters._ + val fieldNames = first.getSchema.getFields.iterator.asScala.map(_.name()).toArray + + val header = fieldNames.mkString("\t") + val firstStr = fieldNames.map(first.get(_)).mkString("\t") + val content = it.take(maxResults) + .map(r => fieldNames.map(r.get(_)).mkString("\t")) + .mkString("\n") + println(s"""%table $header\n$firstStr\n$content""") + notifIfTruncated(it) + } + } + } + } From 61850d7f65ffa791c8c176b1511af9e573189536 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 29 Sep 2016 20:54:47 -0400 Subject: [PATCH 08/36] Add TableRow display helper + style --- .../main/resources/interpreter-setting.json | 2 +- .../org/apache/zeppelin/scio/package.scala | 66 ++++++++++++++----- 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/scio/src/main/resources/interpreter-setting.json b/scio/src/main/resources/interpreter-setting.json index ce939e54fe3..eeacbf4bd95 100644 --- a/scio/src/main/resources/interpreter-setting.json +++ b/scio/src/main/resources/interpreter-setting.json @@ -15,7 +15,7 @@ "envName": "ZEPPELIN_SCIO_MAXRESULT", "propertyName": "zeppelin.scio.maxResult", "defaultValue": "1000", - "description": "Max number of SCollection result to display." + "description": "Max number of SCollection results to display." } }, "editor": { diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/package.scala b/scio/src/main/scala/org/apache/zeppelin/scio/package.scala index b51559768a0..c6463fb2e9f 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/package.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/package.scala @@ -17,7 +17,9 @@ package org.apache.zeppelin +import com.google.api.services.bigquery.model.TableSchema import com.spotify.scio._ +import com.spotify.scio.bigquery.TableRow import com.spotify.scio.values.SCollection import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord @@ -27,6 +29,9 @@ import scala.reflect.ClassTag package object scio { private val SCollectionEmptyMsg = "\n%html Result SCollection is empty!\n" private val maxResults = Integer.getInteger("zeppelin.scio.maxResult", 1000) + private val tab = "\t" + private val newline = "\n" + private val table = "%table" private def materialize[T: ClassTag](self: SCollection[T]) = { val f = self.materialize @@ -36,7 +41,7 @@ package object scio { private def notifIfTruncated(it: Iterator[_]): Unit = { if(it.hasNext) - println("\nResults are limited to " + maxResults + " rows.\n") + println(s"$newlineResults are limited to " + maxResults + s" rows.$newline") } // TODO: scala 2.11 @@ -51,7 +56,7 @@ package object scio { if (it.isEmpty) { println(SCollectionEmptyMsg) } else { - println(s"""%table value\n${it.take(maxResults).map(printer).mkString("\n")}""") + println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}") notifIfTruncated(it) } } @@ -69,7 +74,7 @@ package object scio { if (it.isEmpty) { println(SCollectionEmptyMsg) } else { - println(s"""%table value\n${it.take(maxResults).map(printer).mkString("\n")}""") + println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}") notifIfTruncated(it) } } @@ -86,8 +91,8 @@ package object scio { if (it.isEmpty) { println(SCollectionEmptyMsg) } else { - val content = it.take(maxResults).map{ case (k, v) => s"$k\t$v" }.mkString("\n") - println(s"""%table key\tvalue\n$content""") + val content = it.take(maxResults).map{ case (k, v) => s"$k$tab$v" }.mkString(newline) + println(s"$table key${tab}value$newline$content") notifIfTruncated(it) } } @@ -110,10 +115,10 @@ package object scio { //TODO is this safe field name to value iterator? val fieldNames = first.getClass.getDeclaredFields.map(_.getName) - val header = fieldNames.mkString("\t") - val firstStr = first.productIterator.mkString("\t") - val content = it.take(maxResults).map(_.productIterator.mkString("\t")).mkString("\n") - println(s"""%table $header\n$firstStr\n$content""") + val header = fieldNames.mkString(tab) + val firstStr = first.productIterator.mkString(tab) + val content = it.take(maxResults).map(_.productIterator.mkString(tab)).mkString(newline) + println(s"$table $header$newline$firstStr$newline$content") notifIfTruncated(it) } } @@ -133,14 +138,45 @@ package object scio { } else { val first = it.next() import collection.JavaConverters._ - val fieldNames = first.getSchema.getFields.iterator.asScala.map(_.name()).toArray - val header = fieldNames.mkString("\t") - val firstStr = fieldNames.map(first.get(_)).mkString("\t") + val fieldNames = if (schema != null) { + schema.getFields.iterator().asScala.map(_.name()).toArray + } else { + first.getSchema.getFields.iterator.asScala.map(_.name()).toArray + } + + val header = fieldNames.mkString(tab) + val firstStr = fieldNames.map(first.get).mkString(tab) val content = it.take(maxResults) - .map(r => fieldNames.map(r.get(_)).mkString("\t")) - .mkString("\n") - println(s"""%table $header\n$firstStr\n$content""") + .map(r => fieldNames.map(r.get).mkString(tab)) + .mkString(newline) + println(s"$table $header$newline$firstStr$newline$content") + notifIfTruncated(it) + } + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinBQTableSCollection[T: ClassTag](val self: SCollection[T])(implicit ev: T <:< TableRow) extends AnyVal { + implicit class ZeppelinBQTableSCollection[T: ClassTag](val self: SCollection[T]) + (implicit ev: T <:< TableRow) { + /** Convenience method to close the current [[com.spotify.scio.ScioContext]] + * and display elements from TableRow like SCollection */ + def closeAndDisplay(schema: TableSchema): Unit = { + val it = materialize(self).waitForResult().value + + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + import collection.JavaConverters._ + val fields = schema.getFields.asScala.map(_.getName).toArray + val header = fields.mkString(tab) + + val content = it.take(maxResults) + .map(r => fields.map(r.get).mkString(tab)) + .mkString(newline) + + println(s"$table $header$newline$content") notifIfTruncated(it) } } From 0a3b49a50a40b1944f1666884afcaa4d97a9fc5b Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 29 Sep 2016 21:18:57 -0400 Subject: [PATCH 09/36] Add Scio doc --- docs/interpreter/scio.md | 151 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 docs/interpreter/scio.md diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md new file mode 100644 index 00000000000..157a74e02bd --- /dev/null +++ b/docs/interpreter/scio.md @@ -0,0 +1,151 @@ +--- +layout: page +title: "Scio Interpreter for Apache Zeppelin" +description: "Scio is a Scala DSL for Apache Beam/Google Dataflow model." +group: interpreter +--- + +{% include JB/setup %} + +# Scio Interpreter for Apache Zeppelin + +
+ +## Overview +Scio is a Scala DSL for [Google Cloud Dataflow](https://github.com/GoogleCloudPlatform/DataflowJavaSDK) and [Apache Beam](http://beam.incubator.apache.org/) inspired by [Spark](http://spark.apache.org/) and [Scalding](https://github.com/twitter/scalding). See the current [wiki](https://github.com/spotify/scio/wiki) and [API documentation](http://spotify.github.io/scio/) for more information. + +## Configuration + + + + + + + + + + + + + + + + + +
NameDefault ValueDescription
argz--runner=InProcessPipelineRunnerScio Pipeline runner
zeppelin.scio.maxResult1000Max number of SCollection results to display
+ +## Enabling the Scio Interpreter + +In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and select **scio**. + +## Using the Scio Interpreter + +In a paragraph, use `%scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). Context is shared among all *Scio* paragraphs. There is special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a context via standard `ContextAndArgs`. + +```scala +%scio +val (sc, args) = ContextAndArgs(argz) +``` + +Use `sc` context the way you would in regular pipeline/REPL. + +Example: + +```scala +%scio +val (sc, args) = ContextAndArgs(argz) +sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay() +``` + +Please refer to [Scio wiki](https://github.com/spotify/scio/wiki) for more complex examples. + +### Progress + +There can be only one paragraph running at a time. There is no notion of overall progress - the progress indicator will state `42`, which is just a magic number. + +### SCollection display helpers + +Scio interpreter comes with display helpers to ease working with Zeppelin notebooks. Simply use `closeAndDisplay()` on `SCollection` to close context and display the results. The number of results is limited by `zeppelin.scio.maxResult` (by default 1000). + +Supported `SCollection` types: + * Scio's typed BigQuery + * Scala's case classes + * Scala's tuples + * Google BigQuery's TableRow + * Apache Avro + * All Scala's `Product` like data + * All Scala's `AnyVal` + +#### BigQuery example: + +```scala +%scio +@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays + |FROM [bigquery-samples:airline_ontime_data.flights] + |group by departure_airport + |order by 2 desc + |limit 10""".stripMargin) class Flights + +val (sc, args) = ContextAndArgs(argz) +sc.bigQuerySelect(Flights.query).closeAndDisplay(Flights.schema) +``` + +#### BigQuery typed example: + +```scala +%scio +@BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays + |FROM [bigquery-samples:airline_ontime_data.flights] + |group by departure_airport + |order by 2 desc + |limit 10""".stripMargin) class Flights + +val (sc, args) = ContextAndArgs(argz) +sc.typedBigQuery[Flights]().flatMap(_.no_of_delays).mean.closeAndDisplay() +``` + +#### Avro example: + +```scala +%scio +import com.spotify.data.ExampleAvro + +val (sc, args) = ContextAndArgs(argz) +sc.avroFile[ExampleAvro]("gs:///tmp/my.avro").take(10).closeAndDisplay() +``` + +#### Avro example with a view schema: + +```scala +%scio +import com.spotify.data.ExampleAvro +import org.apache.avro.Schema + +val (sc, args) = ContextAndArgs(argz) +val view = Schema.parse("""{"type":"record","name":"ExampleAvro","namespace":"com.spotify.data","fields":[{"name":"track","type":"string"}, {"name":"artist", "type":"string"}]}""") + +sc.avroFile[EndSongCleaned]("gs:///tmp/my.avro").take(10).closeAndDisplay(view) +``` + +### Google credentials + +Scio Interpreter will try to infer your Google Cloud credentials from its environment, it will take into the account: + * `argz` interpreter settings ([doc](https://github.com/spotify/scio/wiki#options)) + * environment variable (`GOOGLE_APPLICATION_CREDENTIALS`) + * gcloud configuration + +#### BigQuery macro credentials + +Currently BigQuery project for macro expansion is inferred using Google Dataflow's [DefaultProjectFactory().create()](https://github.com/GoogleCloudPlatform/DataflowJavaSDK/blob/master/sdk/src/main/java/com/google/cloud/dataflow/sdk/options/GcpOptions.java#L187) From 93233a8f8be033c4e7ca974e1626f7af7e75a187 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 29 Sep 2016 21:33:44 -0400 Subject: [PATCH 10/36] Rename argz param, fix style --- docs/interpreter/scio.md | 2 +- .../main/resources/interpreter-setting.json | 6 ++-- .../zeppelin/scio/ScioInterpreter.scala | 36 ++++++++++++------- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md index 157a74e02bd..102f2997c89 100644 --- a/docs/interpreter/scio.md +++ b/docs/interpreter/scio.md @@ -34,7 +34,7 @@ Scio is a Scala DSL for [Google Cloud Dataflow](https://github.com/GoogleCloudPl Description - argz + zeppelin.scio.argz --runner=InProcessPipelineRunner Scio Pipeline runner diff --git a/scio/src/main/resources/interpreter-setting.json b/scio/src/main/resources/interpreter-setting.json index eeacbf4bd95..80b60772388 100644 --- a/scio/src/main/resources/interpreter-setting.json +++ b/scio/src/main/resources/interpreter-setting.json @@ -5,9 +5,9 @@ "className": "org.apache.zeppelin.scio.ScioInterpreter", "defaultInterpreter": true, "properties": { - "argz": { - "envName": null, - "propertyName": null, + "zeppelin.scio.argz": { + "envName": "ZEPPELIN_SCIO_ARGZ", + "propertyName": "zeppelin.scio.argz", "defaultValue": "--runner=InProcessPipelineRunner", "description": "Scio interpreter wide arguments" }, diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index b46ea06437e..5a400a0abb7 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -36,6 +36,25 @@ import scala.tools.nsc.Settings import scala.tools.nsc.interpreter.JPrintWriter import scala.tools.nsc.util.ClassPath +/** + * Scio interpreter for Zeppelin. + * + *
    + *
  • {@code zeppelin.scio.argz} - Scio interpreter wide arguments
  • + *
  • {@code zeppelin.scio.maxResult} - Max number of SCollection results to display.
  • + *
+ * + *

+ * How to use:
+ * {@code + * %scio + * val (sc, args) = ContextAndArgs(argz) + * sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay() + * } + *

+ * + */ + class ScioInterpreter(property: Properties) extends Interpreter(property) { private val logger = LoggerFactory.getLogger(classOf[ScioInterpreter]) private var REPL: ScioILoop = null @@ -43,7 +62,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { val innerOut = new InterpreterOutputStream(logger) override def open(): Unit = { - val argz: List[String] = Option(getProperty("argz")) + val argz: List[String] = Option(getProperty("zeppelin.scio.argz")) .getOrElse(s"--runner=${classOf[InProcessPipelineRunner].getSimpleName}") .split(" ") .map(_.trim) @@ -75,9 +94,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { // There should be no harm if we keep this for sbt launch. val thisJar = this.getClass.getProtectionDomain.getCodeSource.getLocation.getPath // In some cases this may be `target/classes` - if(thisJar.endsWith(".jar")) { - settings.plugin.appendToValue(thisJar) - } + if(thisJar.endsWith(".jar")) settings.plugin.appendToValue(thisJar) ClassPath.split(settings.classpath.value) .find(File(_).name.startsWith("paradise_")) @@ -119,11 +136,8 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { cls.getMethods.flatMap { m => val n = m.getName if ((!n.startsWith("get") && !n.startsWith("is")) || - m.getParameterTypes.nonEmpty || m.getReturnType == classOf[Unit]) { - None - } else { - Some(Introspector.decapitalize(n.substring(if (n.startsWith("is")) 2 else 3))) - } + m.getParameterTypes.nonEmpty || m.getReturnType == classOf[Unit]) None + else Some(Introspector.decapitalize(n.substring(if (n.startsWith("is")) 2 else 3))) }.map(s => s"--$s($$|=)".r) } @@ -174,9 +188,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { // not implemented } - override def getFormType: FormType = { - FormType.NATIVE - } + override def getFormType: FormType = FormType.NATIVE override def getProgress(context: InterpreterContext): Int = { // not implemented From 12f0096963777153d586f55e076da75e60cc4c4f Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 29 Sep 2016 22:00:06 -0400 Subject: [PATCH 11/36] Clean up tests --- .../zeppelin/scio/ScioInterpreterTest.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java b/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java index 07d5593c159..6aad4cc95a7 100644 --- a/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java +++ b/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java @@ -24,8 +24,6 @@ import org.apache.zeppelin.user.AuthenticationInfo; import org.junit.Before; import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.util.HashMap; import java.util.LinkedList; @@ -39,18 +37,12 @@ public class ScioInterpreterTest { private static InterpreterGroup intpGroup; private InterpreterContext context; - private static Properties getScioTestProperties() { - Properties p = new Properties(); - //TODO: do we need some properties here? - return p; - } - @Before public void setUp() throws Exception { if (repl == null) { intpGroup = new InterpreterGroup(); intpGroup.put("note", new LinkedList()); - repl = new ScioInterpreter(getScioTestProperties()); + repl = new ScioInterpreter(new Properties()); repl.setInterpreterGroup(intpGroup); intpGroup.get("note").add(repl); repl.open(); @@ -75,13 +67,19 @@ public void onUpdate(InterpreterOutput out, byte[] output) { } @Test - public void testBasicIntp() { + public void testBasicSuccess() { assertEquals(InterpreterResult.Code.SUCCESS, repl.interpret("val a = 1\nval b = 2", context).code()); + } + @Test + public void testBasicSyntaxError() { assertEquals(InterpreterResult.Code.ERROR, repl.interpret("val a:Int = 'ds'", context).code()); + } + @Test + public void testBasicIncomplete() { InterpreterResult incomplete = repl.interpret("val a = \"\"\"", context); assertEquals(InterpreterResult.Code.INCOMPLETE, incomplete.code()); assertTrue(incomplete.message().length() > 0); From 7b54e49d0480124b86b5aab33dc24821a316f102 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 29 Sep 2016 22:01:55 -0400 Subject: [PATCH 12/36] No need to override delegation for completion --- .../scala/org/apache/zeppelin/scio/ScioInterpreter.scala | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index 5a400a0abb7..0d21c27da59 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -195,9 +195,4 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { 42 } - override def completion(buf: String, cursor: Int): util.List[InterpreterCompletion] = { - //TODO: implement, delegate? - super.completion(buf, cursor) - } - } From 99a7daa6ce92d198e20d7197e1dcb0e3055685c4 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Fri, 30 Sep 2016 00:10:28 -0400 Subject: [PATCH 13/36] Progress should be 0 --- docs/interpreter/scio.md | 2 +- .../main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md index 102f2997c89..b12c7c91303 100644 --- a/docs/interpreter/scio.md +++ b/docs/interpreter/scio.md @@ -73,7 +73,7 @@ Please refer to [Scio wiki](https://github.com/spotify/scio/wiki) for more compl ### Progress -There can be only one paragraph running at a time. There is no notion of overall progress - the progress indicator will state `42`, which is just a magic number. +There can be only one paragraph running at a time. There is no notion of overall progress, thus progress bar will be `0`. ### SCollection display helpers diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index 0d21c27da59..e532ce61c22 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -192,7 +192,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { override def getProgress(context: InterpreterContext): Int = { // not implemented - 42 + 0 } } From b884b7245dfe364140e91253e11539b693ff870b Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Fri, 30 Sep 2016 01:27:42 -0400 Subject: [PATCH 14/36] Fix tests - add exception and check messages --- .../zeppelin/scio/ScioInterpreter.scala | 2 +- .../zeppelin/scio/ScioInterpreterTest.java | 65 +++++++++++++------ 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index e532ce61c22..ec6dc69ee7f 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -166,7 +166,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { } case Error => { logger.error(s"Error executing `$code` in $paragraphId") - new InterpreterResult(InterpreterResult.Code.ERROR) + new InterpreterResult(InterpreterResult.Code.ERROR, "Interpreter error") } case Incomplete => { logger.warn(s"Code `$code` not complete in $paragraphId") diff --git a/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java b/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java index 6aad4cc95a7..00a036dcacd 100644 --- a/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java +++ b/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java @@ -37,18 +37,10 @@ public class ScioInterpreterTest { private static InterpreterGroup intpGroup; private InterpreterContext context; - @Before - public void setUp() throws Exception { - if (repl == null) { - intpGroup = new InterpreterGroup(); - intpGroup.put("note", new LinkedList()); - repl = new ScioInterpreter(new Properties()); - repl.setInterpreterGroup(intpGroup); - intpGroup.get("note").add(repl); - repl.open(); - } + private final String newline = "\n"; - context = new InterpreterContext("note", "id", "title", "text", + private InterpreterContext getNewContext() { + return new InterpreterContext("note", "id", "title", "text", new AuthenticationInfo(), new HashMap(), new GUI(), @@ -57,25 +49,37 @@ public void setUp() throws Exception { new LinkedList(), new InterpreterOutput(new InterpreterOutputListener() { @Override - public void onAppend(InterpreterOutput out, byte[] line) { - } - + public void onAppend(InterpreterOutput out, byte[] line) {} @Override - public void onUpdate(InterpreterOutput out, byte[] output) { - } + public void onUpdate(InterpreterOutput out, byte[] output) {} })); } + @Before + public void setUp() throws Exception { + if (repl == null) { + intpGroup = new InterpreterGroup(); + intpGroup.put("note", new LinkedList()); + repl = new ScioInterpreter(new Properties()); + repl.setInterpreterGroup(intpGroup); + intpGroup.get("note").add(repl); + repl.open(); + } + + context = getNewContext(); + } + @Test public void testBasicSuccess() { assertEquals(InterpreterResult.Code.SUCCESS, - repl.interpret("val a = 1\nval b = 2", context).code()); + repl.interpret("val a = 1" + newline + "val b = 2", context).code()); } @Test public void testBasicSyntaxError() { - assertEquals(InterpreterResult.Code.ERROR, - repl.interpret("val a:Int = 'ds'", context).code()); + InterpreterResult error = repl.interpret("val a:Int = 'ds'", context); + assertEquals(InterpreterResult.Code.ERROR, error.code()); + assertTrue(error.message().length() > 0); } @Test @@ -88,6 +92,27 @@ public void testBasicIncomplete() { @Test public void testBasicPipeline() { assertEquals(InterpreterResult.Code.SUCCESS, - repl.interpret("sc.parallelize(1 to 10).closeAndCollect().toList", context).code()); + repl.interpret("val (sc, _) = ContextAndArgs(argz)" + newline + + "sc.parallelize(1 to 10).closeAndCollect().toList", context).code()); } + + @Test + public void testBasicMultiStepPipeline() { + final StringBuilder code = new StringBuilder(); + code.append("val (sc, _) = ContextAndArgs(argz)").append(newline) + .append("val numbers = sc.parallelize(1 to 10)").append(newline) + .append("val results = numbers.closeAndCollect().toList").append(newline) + .append("println(results)"); + assertEquals(InterpreterResult.Code.SUCCESS, + repl.interpret(code.toString(), context).code()); + } + + @Test + public void testException() { + InterpreterResult exception = repl.interpret("val (sc, _) = ContextAndArgs(argz)" + newline + + "throw new Exception(\"test\")", context); + assertEquals(InterpreterResult.Code.ERROR, exception.code()); + assertTrue(exception.message().length() > 0); + } + } From a92494bd51eff5a3184bd3f7f98ff92533c05cd6 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Fri, 30 Sep 2016 11:45:45 -0400 Subject: [PATCH 15/36] Style + use `split` to support both scala 2.{10,11} Compilation on Zeppelin travis fails on ClassPath missing method on scala 2.11. Use split and build URLs manually. --- .../zeppelin/scio/ScioInterpreter.scala | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index ec6dc69ee7f..debe2a5a85a 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -19,14 +19,13 @@ package org.apache.zeppelin.scio import java.beans.Introspector import java.io.PrintStream -import java.util +import java.net.URL import java.util.Properties import com.google.cloud.dataflow.sdk.options.{PipelineOptions, PipelineOptionsFactory} import com.google.cloud.dataflow.sdk.runners.inprocess.InProcessPipelineRunner import com.spotify.scio.repl.{ScioILoop, ScioReplClassLoader} import org.apache.zeppelin.interpreter.Interpreter.FormType -import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion import org.apache.zeppelin.interpreter.util.InterpreterOutputStream import org.apache.zeppelin.interpreter.{Interpreter, InterpreterContext, InterpreterResult} import org.slf4j.LoggerFactory @@ -70,11 +69,8 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { val settings = new Settings() - // For scala 2.10 - usejavacp - if (scala.util.Properties.versionString.contains("2.10.")) { - settings.classpath.append(System.getProperty("java.class.path")) - settings.usejavacp.value = true - } + settings.classpath.append(System.getProperty("java.class.path")) + settings.usejavacp.value = true def classLoaderURLs(cl: ClassLoader): Array[java.net.URL] = cl match { case null => Array() @@ -90,22 +86,24 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { // itself to -Xplugin. If shell is started from sbt or classpath, paradise jar has to be in // classpath, we find it and add it to -Xplugin. - // Repl assembly includes paradise's scalac-plugin.xml - required for BigQuery macro - // There should be no harm if we keep this for sbt launch. val thisJar = this.getClass.getProtectionDomain.getCodeSource.getLocation.getPath // In some cases this may be `target/classes` if(thisJar.endsWith(".jar")) settings.plugin.appendToValue(thisJar) - ClassPath.split(settings.classpath.value) + ClassPath + .split(settings.classpath.value) .find(File(_).name.startsWith("paradise_")) .foreach(settings.plugin.appendToValue) // Force the repl to be synchronous, so all cmds are executed in the same thread settings.Yreplsync.value = true + val jars = ClassPath.split(settings.classpath.value) + .flatMap(ClassPath.specToURL) + .toArray + val scioClassLoader = new ScioReplClassLoader( - ClassPath.toURLs(settings.classpath.value).toArray ++ - classLoaderURLs(Thread.currentThread().getContextClassLoader), + jars ++ classLoaderURLs(Thread.currentThread().getContextClassLoader), null, Thread.currentThread.getContextClassLoader) From 0305a3c4a143adce7cfb2f11602207c40105dc46 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Fri, 30 Sep 2016 21:39:38 -0400 Subject: [PATCH 16/36] Style fix --- .../main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index debe2a5a85a..187fcd90ba0 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -118,7 +118,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { settings.embeddedDefaults(scioClassLoader) // No need for bigquery dumps - sys.props("bigquery.plugin.disable.dump") = "true" + sys.props("bigquery.plugin.disable.dump") = true.toString REPL.settings_=(settings) REPL.createInterpreter() From 4014c817d590ba629f95f85bcc00e579f62444ff Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Sat, 1 Oct 2016 01:52:25 -0400 Subject: [PATCH 17/36] Parse params generic params --- .../org/apache/zeppelin/scio/ScioInterpreter.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index 187fcd90ba0..a2ca20c4c0e 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -18,8 +18,7 @@ package org.apache.zeppelin.scio import java.beans.Introspector -import java.io.PrintStream -import java.net.URL +import java.io.PrintStream} import java.util.Properties import com.google.cloud.dataflow.sdk.options.{PipelineOptions, PipelineOptionsFactory} @@ -31,7 +30,7 @@ import org.apache.zeppelin.interpreter.{Interpreter, InterpreterContext, Interpr import org.slf4j.LoggerFactory import scala.reflect.io.File -import scala.tools.nsc.Settings +import scala.tools.nsc.GenericRunnerCommand import scala.tools.nsc.interpreter.JPrintWriter import scala.tools.nsc.util.ClassPath @@ -67,7 +66,10 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { .map(_.trim) .toList - val settings = new Settings() + // Process command line arguments into a settings object, and use that to start the REPL. + // We ignore params we don't care about - hence error function is empty + val command = new GenericRunnerCommand(argz, _ => ()) + val settings = command.settings settings.classpath.append(System.getProperty("java.class.path")) settings.usejavacp.value = true From 9dcc8cef33ed3e6b1fba9b84ad72ed1db13cf839 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Sat, 1 Oct 2016 14:23:46 -0400 Subject: [PATCH 18/36] Add display helpers for Tap[T] and Future[Tap[T]] --- .../apache/zeppelin/scio/DisplayHelpers.scala | 149 +++++++++++++++++ ...cala => DisplaySCollectionImplicits.scala} | 102 ++---------- .../zeppelin/scio/DisplayTapImplicits.scala | 154 ++++++++++++++++++ .../zeppelin/scio/ScioInterpreter.scala | 5 +- 4 files changed, 320 insertions(+), 90 deletions(-) create mode 100644 scio/src/main/scala/org/apache/zeppelin/scio/DisplayHelpers.scala rename scio/src/main/scala/org/apache/zeppelin/scio/{package.scala => DisplaySCollectionImplicits.scala} (58%) create mode 100644 scio/src/main/scala/org/apache/zeppelin/scio/DisplayTapImplicits.scala diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/DisplayHelpers.scala b/scio/src/main/scala/org/apache/zeppelin/scio/DisplayHelpers.scala new file mode 100644 index 00000000000..ea5195d0b5d --- /dev/null +++ b/scio/src/main/scala/org/apache/zeppelin/scio/DisplayHelpers.scala @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio + +import com.google.api.services.bigquery.model.TableSchema +import com.spotify.scio.bigquery._ +import org.apache.avro.Schema +import org.apache.avro.generic.GenericRecord + +import scala.reflect.ClassTag + +/** + * Set of helpers for Zeppelin Display system. + */ +private[scio] object DisplayHelpers { + + private val SCollectionEmptyMsg = "\n%html Result SCollection is empty!\n" + private val maxResults = Integer.getInteger("zeppelin.scio.maxResult", 1000) + private val tab = "\t" + private val newline = "\n" + private val table = "%table" + + private def notifyIfTruncated(it: Iterator[_]): Unit = { + if(it.hasNext) + println(s"$newlineResults are limited to " + maxResults + s" rows.$newline") + } + + /** + * Displays [[AnyVal]] values from given [[Iterator]]. + */ + private[scio] def displayAnyVal[T: ClassTag](it: Iterator[T], printer: (T) => String): Unit = { + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}") + notifyIfTruncated(it) + } + } + + /** + * Displays [[String]] values from given [[Iterator]]. + */ + private[scio] def displayString[T: ClassTag](it: Iterator[T], printer: (T) => String): Unit = { + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}") + notifyIfTruncated(it) + } + } + + /** + * Displays [[com.google.cloud.dataflow.sdk.values.KV]] values from given [[Iterator]]. + */ + private[scio] def displayKV[K: ClassTag, V: ClassTag](it: Iterator[(K,V)]): Unit = { + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + val content = it.take(maxResults).map{ case (k, v) => s"$k$tab$v" }.mkString(newline) + println(s"$table key${tab}value$newline$content") + notifyIfTruncated(it) + } + } + + /** + * Displays [[Product]] values from given [[Iterator]]. + */ + private[scio] def displayProduct[T: ClassTag](it: Iterator[T]) + (implicit ev: T <:< Product): Unit = { + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + val first = it.next() + //TODO is this safe field name to value iterator? + val fieldNames = first.getClass.getDeclaredFields.map(_.getName) + + val header = fieldNames.mkString(tab) + val firstStr = first.productIterator.mkString(tab) + val content = it.take(maxResults).map(_.productIterator.mkString(tab)).mkString(newline) + println(s"$table $header$newline$firstStr$newline$content") + notifyIfTruncated(it) + } + } + + /** + * Displays Avro values from given [[Iterator]] using optional [[Schema]]. + * @param schema optional "view" schema, otherwise schema is inferred from the first object + */ + private[scio] def displayAvro[T: ClassTag](it: Iterator[T], schema: Schema = null) + (implicit ev: T <:< GenericRecord): Unit = { + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + val first = it.next() + import collection.JavaConverters._ + + val fieldNames = if (schema != null) { + schema.getFields.iterator().asScala.map(_.name()).toArray + } else { + first.getSchema.getFields.iterator.asScala.map(_.name()).toArray + } + + val header = fieldNames.mkString(tab) + val firstStr = fieldNames.map(first.get).mkString(tab) + val content = it.take(maxResults) + .map(r => fieldNames.map(r.get).mkString(tab)) + .mkString(newline) + println(s"$table $header$newline$firstStr$newline$content") + notifyIfTruncated(it) + } + } + + /** + * Displays [[TableRow]] values from given [[Iterator]] using specified [[TableSchema]]. + */ + private[scio] def displayBQTableRow[T: ClassTag](it: Iterator[T], schema: TableSchema) + (implicit ev: T <:< TableRow) : Unit = { + if (it.isEmpty) { + println(SCollectionEmptyMsg) + } else { + import collection.JavaConverters._ + val fields = schema.getFields.asScala.map(_.getName).toArray + val header = fields.mkString(tab) + + val content = it.take(maxResults) + .map(r => fields.map(r.get).mkString(tab)) + .mkString(newline) + + println(s"$table $header$newline$content") + notifyIfTruncated(it) + } + } + +} diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/package.scala b/scio/src/main/scala/org/apache/zeppelin/scio/DisplaySCollectionImplicits.scala similarity index 58% rename from scio/src/main/scala/org/apache/zeppelin/scio/package.scala rename to scio/src/main/scala/org/apache/zeppelin/scio/DisplaySCollectionImplicits.scala index c6463fb2e9f..f7421dc41b7 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/package.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/DisplaySCollectionImplicits.scala @@ -15,23 +15,21 @@ * limitations under the License. */ -package org.apache.zeppelin +package org.apache.zeppelin.scio import com.google.api.services.bigquery.model.TableSchema import com.spotify.scio._ -import com.spotify.scio.bigquery.TableRow +import com.spotify.scio.bigquery._ import com.spotify.scio.values.SCollection import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import scala.reflect.ClassTag -package object scio { - private val SCollectionEmptyMsg = "\n%html Result SCollection is empty!\n" - private val maxResults = Integer.getInteger("zeppelin.scio.maxResult", 1000) - private val tab = "\t" - private val newline = "\n" - private val table = "%table" +/** + * Implicit Zeppelin display helpers for SCollection. + */ +object DisplaySCollectionImplicits { private def materialize[T: ClassTag](self: SCollection[T]) = { val f = self.materialize @@ -39,26 +37,15 @@ package object scio { f } - private def notifIfTruncated(it: Iterator[_]): Unit = { - if(it.hasNext) - println(s"$newlineResults are limited to " + maxResults + s" rows.$newline") - } - // TODO: scala 2.11 - // implicit class ZeppelinSCollection[T: ClassTag](private val self: SCollection[T]) extends AnyVal { + // implicit class ZeppelinSCollection[T: ClassTag](private val self: SCollection[T])(implicit ev: T <:< AnyVal) extends AnyVal { implicit class ZeppelinSCollection[T: ClassTag](val self: SCollection[T]) (implicit ev: T <:< AnyVal) { /** Convenience method to close the current [[com.spotify.scio.ScioContext]] * and display elements from SCollection. */ def closeAndDisplay(printer: (T) => String = (e: T) => e.toString): Unit = { val it = materialize(self).waitForResult().value - - if (it.isEmpty) { - println(SCollectionEmptyMsg) - } else { - println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}") - notifIfTruncated(it) - } + DisplayHelpers.displayAnyVal(it, printer) } } @@ -70,13 +57,7 @@ package object scio { * and display elements from SCollection. */ def closeAndDisplay(printer: (T) => String = (e: T) => e.toString): Unit = { val it = materialize(self).waitForResult().value - - if (it.isEmpty) { - println(SCollectionEmptyMsg) - } else { - println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}") - notifIfTruncated(it) - } + DisplayHelpers.displayString(it, printer) } } @@ -87,16 +68,8 @@ package object scio { * and display elements from KV SCollection. */ def closeAndDisplay(): Unit = { val it = materialize(self).waitForResult().value - - if (it.isEmpty) { - println(SCollectionEmptyMsg) - } else { - val content = it.take(maxResults).map{ case (k, v) => s"$k$tab$v" }.mkString(newline) - println(s"$table key${tab}value$newline$content") - notifIfTruncated(it) - } + DisplayHelpers.displayKV(it) } - } // TODO: scala 2.11 @@ -107,20 +80,7 @@ package object scio { * and display elements from Product like SCollection */ def closeAndDisplay(): Unit = { val it = materialize(self).waitForResult().value - - if (it.isEmpty) { - println(SCollectionEmptyMsg) - } else { - val first = it.next() - //TODO is this safe field name to value iterator? - val fieldNames = first.getClass.getDeclaredFields.map(_.getName) - - val header = fieldNames.mkString(tab) - val firstStr = first.productIterator.mkString(tab) - val content = it.take(maxResults).map(_.productIterator.mkString(tab)).mkString(newline) - println(s"$table $header$newline$firstStr$newline$content") - notifIfTruncated(it) - } + DisplayHelpers.displayProduct(it) } } @@ -132,53 +92,19 @@ package object scio { * and display elements from Avro like SCollection */ def closeAndDisplay(schema: Schema = null): Unit = { val it = materialize(self).waitForResult().value - - if (it.isEmpty) { - println(SCollectionEmptyMsg) - } else { - val first = it.next() - import collection.JavaConverters._ - - val fieldNames = if (schema != null) { - schema.getFields.iterator().asScala.map(_.name()).toArray - } else { - first.getSchema.getFields.iterator.asScala.map(_.name()).toArray - } - - val header = fieldNames.mkString(tab) - val firstStr = fieldNames.map(first.get).mkString(tab) - val content = it.take(maxResults) - .map(r => fieldNames.map(r.get).mkString(tab)) - .mkString(newline) - println(s"$table $header$newline$firstStr$newline$content") - notifIfTruncated(it) - } + DisplayHelpers.displayAvro(it, schema) } } // TODO: scala 2.11 // implicit class ZeppelinBQTableSCollection[T: ClassTag](val self: SCollection[T])(implicit ev: T <:< TableRow) extends AnyVal { implicit class ZeppelinBQTableSCollection[T: ClassTag](val self: SCollection[T]) - (implicit ev: T <:< TableRow) { + (implicit ev: T <:< TableRow) { /** Convenience method to close the current [[com.spotify.scio.ScioContext]] * and display elements from TableRow like SCollection */ def closeAndDisplay(schema: TableSchema): Unit = { val it = materialize(self).waitForResult().value - - if (it.isEmpty) { - println(SCollectionEmptyMsg) - } else { - import collection.JavaConverters._ - val fields = schema.getFields.asScala.map(_.getName).toArray - val header = fields.mkString(tab) - - val content = it.take(maxResults) - .map(r => fields.map(r.get).mkString(tab)) - .mkString(newline) - - println(s"$table $header$newline$content") - notifIfTruncated(it) - } + DisplayHelpers.displayBQTableRow(it, schema) } } diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/DisplayTapImplicits.scala b/scio/src/main/scala/org/apache/zeppelin/scio/DisplayTapImplicits.scala new file mode 100644 index 00000000000..8aafc310a01 --- /dev/null +++ b/scio/src/main/scala/org/apache/zeppelin/scio/DisplayTapImplicits.scala @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio + +import com.google.api.services.bigquery.model.TableSchema +import com.spotify.scio.bigquery.TableRow +import com.spotify.scio.io.Tap +import com.spotify.scio._ +import org.apache.avro.Schema +import org.apache.avro.generic.GenericRecord + +import scala.concurrent.Future +import scala.reflect.ClassTag + +/** + * Implicit Zeppelin display helpers for [[Tap]] and [[Future]] of a [[Tap]]. + */ +object DisplayTapImplicits { + + // TODO: scala 2.11 + // implicit class ZeppelinTap[T: ClassTag](private val self: Tap[T])(implicit ev: T <:< AnyVal) extends AnyVal { + implicit class ZeppelinTap[T: ClassTag](val self: Tap[T]) + (implicit ev: T <:< AnyVal) { + /** Convenience method to display [[com.spotify.scio.io.Tap]] of AnyVal. */ + def display(printer: (T) => String = (e: T) => e.toString): Unit = { + DisplayHelpers.displayAnyVal(self.value, printer) + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinFutureTap[T: ClassTag](private val self: Future[Tap[T]])(implicit ev: T <:< AnyVal) extends AnyVal { + implicit class ZeppelinFutureTap[T: ClassTag](val self: Future[Tap[T]]) + (implicit ev: T <:< AnyVal) { + /** Convenience method to display [[Future]] of a [[com.spotify.scio.io.Tap]] of AnyVal. */ + def waitAndDisplay(printer: (T) => String = (e: T) => e.toString): Unit = { + ZeppelinTap(self.waitForResult()).display(printer) + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinStringTap[T: ClassTag](private val self: Tap[T])(implicit ev: T <:< String) extends AnyVal { + implicit class ZeppelinStringTap[T: ClassTag](val self: Tap[T]) + (implicit ev: T <:< String) { + /** Convenience method to display [[com.spotify.scio.io.Tap]] of Strings. */ + def display(printer: (T) => String = (e: T) => e.toString): Unit = { + DisplayHelpers.displayString(self.value, printer) + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinFutureStringTap[T: ClassTag](private val self: Tap[T])(implicit ev: T <:< String) extends AnyVal { + implicit class ZeppelinFutureStringTap[T: ClassTag](val self: Future[Tap[T]]) + (implicit ev: T <:< String) { + /** Convenience method to display [[Future]] of a [[com.spotify.scio.io.Tap]] of Strings. */ + def waitAndDisplay(printer: (T) => String = (e: T) => e.toString): Unit = { + ZeppelinStringTap(self.waitForResult()).display(printer) + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinKVTap[K: ClassTag, V: ClassTag](val self: Tap[(K, V)]) extends AnyVal { + implicit class ZeppelinKVTap[K: ClassTag, V: ClassTag](val self: Tap[(K, V)]) { + /** Convenience method to display [[com.spotify.scio.io.Tap]] of KV. */ + def display(): Unit = { + DisplayHelpers.displayKV(self.value) + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinFutureKVTap[K: ClassTag, V: ClassTag](val self: Future[Tap[(K, V)]]) extends AnyVal { + implicit class ZeppelinFutureKVTap[K: ClassTag, V: ClassTag](val self: Future[Tap[(K, V)]]) { + /** Convenience method to display [[Future]] of a [[com.spotify.scio.io.Tap]] of KV. */ + def waitAndDisplay(): Unit = { + ZeppelinKVTap(self.waitForResult()).display() + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinProductTap[T: ClassTag](val self: Tap[T])(implicit ev: T <:< Product) extends AnyVal { + implicit class ZeppelinProductTap[T: ClassTag](val self: Tap[T]) + (implicit ev: T <:< Product) { + /** Convenience method to display [[com.spotify.scio.io.Tap]] of Product. */ + def display(): Unit = { + DisplayHelpers.displayProduct(self.value) + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinFutureProductTap[T: ClassTag](val self: Future[Tap[T]])(implicit ev: T <:< Product) extends AnyVal { + implicit class ZeppelinFutureProductTap[T: ClassTag](val self: Future[Tap[T]]) + (implicit ev: T <:< Product) { + /** Convenience method to display [[Future]] of a [[com.spotify.scio.io.Tap]] of Product. */ + def waitAndDisplay(): Unit = { + ZeppelinProductTap(self.waitForResult()).display() + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinAvroTap[T: ClassTag](val self: Tap[T])(implicit ev: T <:< GenericRecord) extends AnyVal { + implicit class ZeppelinAvroTap[T: ClassTag](val self: Tap[T]) + (implicit ev: T <:< GenericRecord) { + /** Convenience method to display [[com.spotify.scio.io.Tap]] of Avro. */ + def display(schema: Schema = null): Unit = { + DisplayHelpers.displayAvro(self.value, schema) + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinFutureAvroTap[T: ClassTag](val self: Future[Tap[T]])(implicit ev: T <:< GenericRecord) extends AnyVal { + implicit class ZeppelinFutureAvroTap[T: ClassTag](val self: Future[Tap[T]]) + (implicit ev: T <:< GenericRecord) { + /** Convenience method to display [[Future]] of a [[com.spotify.scio.io.Tap]] of Avro. */ + def waitAndDisplay(schema: Schema = null): Unit = { + ZeppelinAvroTap(self.waitForResult()).display(schema) + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinBQTableTap[T: ClassTag](val self: Tap[T])(implicit ev: T <:< TableRow) extends AnyVal { + implicit class ZeppelinBQTableTap[T: ClassTag](val self: Tap[T]) + (implicit ev: T <:< TableRow) { + /** Convenience method to display [[com.spotify.scio.io.Tap]] of BigQuery TableRow. */ + def display(schema: TableSchema): Unit = { + DisplayHelpers.displayBQTableRow(self.value, schema) + } + } + + // TODO: scala 2.11 + // implicit class ZeppelinFutureBQTableTap[T: ClassTag](val self: Future[Tap[T]])(implicit ev: T <:< TableRow) extends AnyVal { + implicit class ZeppelinFutureBQTableTap[T: ClassTag](val self: Future[Tap[T]]) + (implicit ev: T <:< TableRow) { + /** Convenience method to display [[Future]] of a [[com.spotify.scio.io.Tap]] of BigQuery + * TableRow. */ + def waitAndDisplay(schema: TableSchema): Unit = { + ZeppelinBQTableTap(self.waitForResult()).display(schema) + } + } + +} diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index a2ca20c4c0e..6f0f1947175 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -18,7 +18,7 @@ package org.apache.zeppelin.scio import java.beans.Introspector -import java.io.PrintStream} +import java.io.PrintStream import java.util.Properties import com.google.cloud.dataflow.sdk.options.{PipelineOptions, PipelineOptionsFactory} @@ -125,7 +125,8 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { REPL.settings_=(settings) REPL.createInterpreter() REPL.interpret(s"""val argz = Array("${argz.mkString("\", \"")}")""") - REPL.interpret("import org.apache.zeppelin.scio._") + REPL.interpret("import org.apache.zeppelin.scio.DisplaySCollectionImplicits._") + REPL.interpret("import org.apache.zeppelin.scio.DisplayTapImplicits._") } private def parseAndPartitionArgs(args: List[String]): (List[String], List[String]) = { From c0f8ccff4dd956762d08d86037070cc0a7bbe5b6 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Tue, 4 Oct 2016 12:44:50 -0400 Subject: [PATCH 19/36] Fix style and number of records for take --- .../apache/zeppelin/scio/DisplayHelpers.scala | 53 +++++++++++-------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/DisplayHelpers.scala b/scio/src/main/scala/org/apache/zeppelin/scio/DisplayHelpers.scala index ea5195d0b5d..cdf718c5f7a 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/DisplayHelpers.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/DisplayHelpers.scala @@ -29,15 +29,19 @@ import scala.reflect.ClassTag */ private[scio] object DisplayHelpers { - private val SCollectionEmptyMsg = "\n%html Result SCollection is empty!\n" + private[scio] val sCollectionEmptyMsg = + "\n%html Result SCollection is empty!\n" private val maxResults = Integer.getInteger("zeppelin.scio.maxResult", 1000) - private val tab = "\t" - private val newline = "\n" - private val table = "%table" + private[scio] val tab = "\t" + private[scio] val newline = "\n" + private[scio] val table = "%table" + private[scio] val rowLimitReachedMsg = + s"$newlineResults are limited to " + maxResults + s" rows.$newline" + private[scio] val bQSchemaIncomplete = + s"$newlineProvided BigQuery Schema has not fields!$newline" private def notifyIfTruncated(it: Iterator[_]): Unit = { - if(it.hasNext) - println(s"$newlineResults are limited to " + maxResults + s" rows.$newline") + if(it.hasNext) println(rowLimitReachedMsg) } /** @@ -45,7 +49,7 @@ private[scio] object DisplayHelpers { */ private[scio] def displayAnyVal[T: ClassTag](it: Iterator[T], printer: (T) => String): Unit = { if (it.isEmpty) { - println(SCollectionEmptyMsg) + println(sCollectionEmptyMsg) } else { println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}") notifyIfTruncated(it) @@ -57,7 +61,7 @@ private[scio] object DisplayHelpers { */ private[scio] def displayString[T: ClassTag](it: Iterator[T], printer: (T) => String): Unit = { if (it.isEmpty) { - println(SCollectionEmptyMsg) + println(sCollectionEmptyMsg) } else { println(s"$table value$newline${it.take(maxResults).map(printer).mkString(newline)}") notifyIfTruncated(it) @@ -69,7 +73,7 @@ private[scio] object DisplayHelpers { */ private[scio] def displayKV[K: ClassTag, V: ClassTag](it: Iterator[(K,V)]): Unit = { if (it.isEmpty) { - println(SCollectionEmptyMsg) + println(sCollectionEmptyMsg) } else { val content = it.take(maxResults).map{ case (k, v) => s"$k$tab$v" }.mkString(newline) println(s"$table key${tab}value$newline$content") @@ -83,7 +87,7 @@ private[scio] object DisplayHelpers { private[scio] def displayProduct[T: ClassTag](it: Iterator[T]) (implicit ev: T <:< Product): Unit = { if (it.isEmpty) { - println(SCollectionEmptyMsg) + println(sCollectionEmptyMsg) } else { val first = it.next() //TODO is this safe field name to value iterator? @@ -91,7 +95,7 @@ private[scio] object DisplayHelpers { val header = fieldNames.mkString(tab) val firstStr = first.productIterator.mkString(tab) - val content = it.take(maxResults).map(_.productIterator.mkString(tab)).mkString(newline) + val content = it.take(maxResults - 1).map(_.productIterator.mkString(tab)).mkString(newline) println(s"$table $header$newline$firstStr$newline$content") notifyIfTruncated(it) } @@ -104,7 +108,7 @@ private[scio] object DisplayHelpers { private[scio] def displayAvro[T: ClassTag](it: Iterator[T], schema: Schema = null) (implicit ev: T <:< GenericRecord): Unit = { if (it.isEmpty) { - println(SCollectionEmptyMsg) + println(sCollectionEmptyMsg) } else { val first = it.next() import collection.JavaConverters._ @@ -117,7 +121,7 @@ private[scio] object DisplayHelpers { val header = fieldNames.mkString(tab) val firstStr = fieldNames.map(first.get).mkString(tab) - val content = it.take(maxResults) + val content = it.take(maxResults - 1) .map(r => fieldNames.map(r.get).mkString(tab)) .mkString(newline) println(s"$table $header$newline$firstStr$newline$content") @@ -131,18 +135,25 @@ private[scio] object DisplayHelpers { private[scio] def displayBQTableRow[T: ClassTag](it: Iterator[T], schema: TableSchema) (implicit ev: T <:< TableRow) : Unit = { if (it.isEmpty) { - println(SCollectionEmptyMsg) + println(sCollectionEmptyMsg) } else { import collection.JavaConverters._ - val fields = schema.getFields.asScala.map(_.getName).toArray - val header = fields.mkString(tab) + val fieldsOp = Option(schema.getFields) + fieldsOp match { + case None => println(bQSchemaIncomplete) + case Some(f) => { + val fields = f.asScala.map(_.getName).toArray - val content = it.take(maxResults) - .map(r => fields.map(r.get).mkString(tab)) - .mkString(newline) + val header = fields.mkString(tab) - println(s"$table $header$newline$content") - notifyIfTruncated(it) + val content = it.take(maxResults) + .map(r => fields.map(r.get).mkString(tab)) + .mkString(newline) + + println(s"$table $header$newline$content") + notifyIfTruncated(it) + } + } } } From e6356749cad2688c1597ac4331dea4828303cf35 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Tue, 4 Oct 2016 12:45:35 -0400 Subject: [PATCH 20/36] Add tests for DisplayHelpers --- scio/pom.xml | 47 ++ scio/src/test/avro/schema.avsc | 12 + .../scio/DisplayHelpersTestScala211.scala | 55 ++ .../apache/zeppelin/scio/TestCCScala211.scala | 22 + .../zeppelin/scio/DisplayHelpersTest.scala | 548 ++++++++++++++++++ .../org/apache/zeppelin/scio/TestCC.scala | 24 + .../apache/zeppelin/scio/util/TestUtils.scala | 50 ++ 7 files changed, 758 insertions(+) create mode 100644 scio/src/test/avro/schema.avsc create mode 100644 scio/src/test/scala-2.11/org/apache/zeppelin/scio/DisplayHelpersTestScala211.scala create mode 100644 scio/src/test/scala-2.11/org/apache/zeppelin/scio/TestCCScala211.scala create mode 100644 scio/src/test/scala/org/apache/zeppelin/scio/DisplayHelpersTest.scala create mode 100644 scio/src/test/scala/org/apache/zeppelin/scio/TestCC.scala create mode 100644 scio/src/test/scala/org/apache/zeppelin/scio/util/TestUtils.scala diff --git a/scio/pom.xml b/scio/pom.xml index 742c78ecf79..7cae0a5cbeb 100644 --- a/scio/pom.xml +++ b/scio/pom.xml @@ -221,6 +221,27 @@ 1.9 + + + org.scalatest + scalatest_${scala.binary.version} + ${scalatest.version} + test + + + + junit + junit + test + + + + com.spotify + scio-test_${scala.binary.version} + ${scio.version} + test + + @@ -336,6 +357,32 @@ + + + + org.apache.avro + avro-maven-plugin + 1.7.7 + + + generate-sources + + schema + + + ${project.basedir}/src/test/avro/ + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.7 + 1.7 + + diff --git a/scio/src/test/avro/schema.avsc b/scio/src/test/avro/schema.avsc new file mode 100644 index 00000000000..07c3bea8884 --- /dev/null +++ b/scio/src/test/avro/schema.avsc @@ -0,0 +1,12 @@ +{ + "type": "record", + "name": "Account", + "namespace": "org.apache.zeppelin.scio.avro", + "doc": "Record for an account", + "fields": [ + {"name": "id", "type": "int"}, + {"name": "type", "type": "string"}, + {"name": "name", "type": "string"}, + {"name": "amount", "type": "double"} + ] +} diff --git a/scio/src/test/scala-2.11/org/apache/zeppelin/scio/DisplayHelpersTestScala211.scala b/scio/src/test/scala-2.11/org/apache/zeppelin/scio/DisplayHelpersTestScala211.scala new file mode 100644 index 00000000000..729dc87f5c3 --- /dev/null +++ b/scio/src/test/scala-2.11/org/apache/zeppelin/scio/DisplayHelpersTestScala211.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio + +import org.apache.zeppelin.scio.util.TestUtils +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner +import org.scalatest.{FlatSpec, Matchers} + +/** + * Scala 2.11 DisplayHelpersTest tests. + * + * Most tests have test scope implicit imports due to scala 2.10 bug + * https://issues.scala-lang.org/browse/SI-3346 + * + * Note: we can't depend on the order of data coming from SCollection. + */ +@RunWith(classOf[JUnitRunner] +class DisplayHelpersTestScala211 extends FlatSpec with Matchers { + import TestUtils._ + + // ----------------------------------------------------------------------------------------------- + // Product SCollection Tests + // ----------------------------------------------------------------------------------------------- + + it should "support SCollection of Case Class of 23" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinProductSCollection + val tupleHeader = s"$table " + (1 to 22).map(i => s"a$i$tab").mkString + "a23" + val o = captureOut { + sideEffectWithData( + Seq.fill(3)(CC23(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23))) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs (Seq(tupleHeader) ++ + Seq.fill(3)((1 to 22).map(i => s"$i$tab").mkString + "23")) + o.head should be(tupleHeader) + } + +} diff --git a/scio/src/test/scala-2.11/org/apache/zeppelin/scio/TestCCScala211.scala b/scio/src/test/scala-2.11/org/apache/zeppelin/scio/TestCCScala211.scala new file mode 100644 index 00000000000..eca120598fe --- /dev/null +++ b/scio/src/test/scala-2.11/org/apache/zeppelin/scio/TestCCScala211.scala @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio + +case class CC23(a1: Int, a2: Int, a3: Int, a4: Int, a5: Int, a6: Int, a7: Int, a8: Int, a9: Int, + a10: Int, a11: Int, a12: Int, a13: Int, a14: Int, a15: Int, a16: Int, a17: Int, + a18: Int, a19: Int, a20: Int, a21: Int, a22: Int, a23: Int) \ No newline at end of file diff --git a/scio/src/test/scala/org/apache/zeppelin/scio/DisplayHelpersTest.scala b/scio/src/test/scala/org/apache/zeppelin/scio/DisplayHelpersTest.scala new file mode 100644 index 00000000000..1ba4c7e90c7 --- /dev/null +++ b/scio/src/test/scala/org/apache/zeppelin/scio/DisplayHelpersTest.scala @@ -0,0 +1,548 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio + +import com.google.api.services.bigquery.model.{TableFieldSchema, TableSchema} +import com.spotify.scio.bigquery._ +import org.apache.avro.Schema +import org.apache.avro.Schema.Parser +import org.apache.avro.generic.{GenericData, GenericRecord} +import org.apache.zeppelin.scio.avro.Account +import org.apache.zeppelin.scio.util.TestUtils +import org.junit.runner.RunWith +import org.scalatest.junit.JUnitRunner +import org.scalatest.{FlatSpec, Matchers} + +/** + * DisplayHelpersTest tests. + * + * Most tests have test scope implicit imports due to scala 2.10 bug + * https://issues.scala-lang.org/browse/SI-3346 + * + * Note: we can't depend on the order of data coming from SCollection. + */ +@RunWith(classOf[JUnitRunner]) +class DisplayHelpersTest extends FlatSpec with Matchers { + private val testRowLimit = 20 + sys.props("zeppelin.scio.maxResult") = 20.toString + + import TestUtils._ + + // ----------------------------------------------------------------------------------------------- + // AnyVal SCollection Tests + // ----------------------------------------------------------------------------------------------- + + private val anyValHeader = s"$table value" + + "DisplayHelpers" should "support Integer SCollection via AnyVal" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(Seq(1, 2, 3)) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(anyValHeader, + "1", + "2", + "3") + o.head should be(anyValHeader) + } + + it should "support Long SCollection via AnyVal" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(Seq(1L, 2L, 3L)) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(anyValHeader, + "1", + "2", + "3") + o.head should be(anyValHeader) + } + + it should "support Double SCollection via AnyVal" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(Seq(1.0D, 2.0D, 3.0D)) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(anyValHeader, + "1.0", + "2.0", + "3.0") + o.head should be(anyValHeader) + } + + it should "support Float SCollection via AnyVal" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(Seq(1.0F, 2.0F, 3.0F)) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(anyValHeader, + "1.0", + "2.0", + "3.0") + o.head should be(anyValHeader) + } + + it should "support Short SCollection via AnyVal" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(Seq(1.toShort, 2.toShort, 3.toShort)) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(anyValHeader, + "1", + "2", + "3") + o.head should be(anyValHeader) + } + + it should "support Byte SCollection via AnyVal" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(Seq(1.toByte, 2.toByte, 3.toByte)) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(anyValHeader, + "1", + "2", + "3") + o.head should be(anyValHeader) + } + + it should "support Boolean SCollection via AnyVal" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(Seq(true, false, true)) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(anyValHeader, + "true", + "false", + "true") + o.head should be(anyValHeader) + } + + it should "support Char SCollection via AnyVal" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(Seq('a', 'b', 'c')) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(anyValHeader, + "a", + "b", + "c") + o.head should be(anyValHeader) + } + + it should "support SCollection of AnyVal over row limit" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(1 to 21) { in => + in.closeAndDisplay() + } + } + o.size should be > testRowLimit + o.head should be(anyValHeader) + o.last should be(rowLimitReached) + } + + it should "support empty SCollection of AnyVal" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinSCollection + val o = captureOut { + sideEffectWithData(Seq.empty[AnyVal]) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs DisplayHelpers.sCollectionEmptyMsg.split(newline) + } + + // ----------------------------------------------------------------------------------------------- + // String SCollection Tests + // ----------------------------------------------------------------------------------------------- + + private val stringHeader = s"$table value" + + it should "support String SCollection" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinStringSCollection + val o = captureOut { + sideEffectWithData(Seq("a","b","c")) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(stringHeader, + "a", + "b", + "c") + o.head should be (stringHeader) + } + + it should "support empty SCollection of String" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinStringSCollection + val o = captureOut { + sideEffectWithData(Seq.empty[String]) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs DisplayHelpers.sCollectionEmptyMsg.split(newline) + } + + it should "support SCollection of String over row limit" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinStringSCollection + val o = captureOut { + sideEffectWithData(Seq.fill(21)("a")) { in => + in.closeAndDisplay() + } + } + o.size should be > testRowLimit + o.head should be(stringHeader) + o.last should be(rowLimitReached) + } + + // ----------------------------------------------------------------------------------------------- + // KV SCollection Tests + // ----------------------------------------------------------------------------------------------- + + private val kvHeader = s"$table key${tab}value" + + it should "support KV (ints) SCollection" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinKVSCollection + val o = captureOut { + sideEffectWithData(Seq((1,2), (3,4))) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(kvHeader, + s"3${tab}4", + s"1${tab}2") + o.head should be (kvHeader) + } + + it should "support KV (str keys) SCollection" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinKVSCollection + val o = captureOut { + sideEffectWithData(Seq(("foo",2), ("bar",4))) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(kvHeader, + s"foo${tab}2", + s"bar${tab}4") + o.head should be (kvHeader) + } + + it should "support KV (str values) SCollection" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinKVSCollection + val o = captureOut { + sideEffectWithData(Seq((2,"foo"), (4,"bar"))) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs Seq(kvHeader, + s"2${tab}foo", + s"4${tab}bar") + o.head should be (kvHeader) + } + + it should "support empty KV SCollection" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinKVSCollection + captureOut { + sideEffectWithData(Seq.empty[(Int, Int)]) { in => + in.closeAndDisplay() + } + } should contain theSameElementsAs DisplayHelpers.sCollectionEmptyMsg.split(newline) + } + + it should "support SCollection of KV over row limit" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinKVSCollection + val o = captureOut { + sideEffectWithData(Seq.fill(21)(("foo", 1))) { in => + in.closeAndDisplay() + } + } + o.size should be > testRowLimit + o.head should be(kvHeader) + o.last should be(rowLimitReached) + } + + // ----------------------------------------------------------------------------------------------- + // Product SCollection Tests + // ----------------------------------------------------------------------------------------------- + + private val testCaseClassHeader = s"$table foo${tab}bar${tab}a" + + it should "support SCollection of Tuple of 3" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinProductSCollection + val tupleHeader = s"$table _1${tab}_2${tab}_3" + val o = captureOut { + sideEffectWithData(Seq.fill(3)((1,2,3))) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs (Seq(tupleHeader) ++ Seq.fill(3)(s"1${tab}2${tab}3")) + o.head should be(tupleHeader) + } + + it should "support SCollection of Tuple of 22" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinProductSCollection + val tupleHeader = s"$table " + (1 to 21).map(i => s"_$i$tab").mkString + "_22" + val o = captureOut { + sideEffectWithData( + Seq.fill(3)((1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22))) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs (Seq(tupleHeader) ++ + Seq.fill(3)((1 to 21).map(i => s"$i$tab").mkString + "22")) + o.head should be(tupleHeader) + } + + it should "support SCollection of Case Class of 22" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinProductSCollection + val tupleHeader = s"$table " + (1 to 21).map(i => s"a$i$tab").mkString + "a22" + val o = captureOut { + sideEffectWithData( + Seq.fill(3)(CC22(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22))) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs (Seq(tupleHeader) ++ + Seq.fill(3)((1 to 21).map(i => s"$i$tab").mkString + "22")) + o.head should be(tupleHeader) + } + + it should "support SCollection of Case Class" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinProductSCollection + val o = captureOut { + sideEffectWithData(Seq.fill(3)(TestCaseClass(1, "foo", 2.0D))) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs (Seq(testCaseClassHeader) ++ + Seq.fill(3)(s"1${tab}foo${tab}2.0")) + o.head should be(testCaseClassHeader) + } + + it should "support empty SCollection of Product" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinProductSCollection + captureOut { + sideEffectWithData(Seq.empty[Product]) { in => + in.closeAndDisplay() + } + } should contain theSameElementsAs DisplayHelpers.sCollectionEmptyMsg.split(newline) + } + + it should "support SCollection of Product over row limit" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinProductSCollection + val o = captureOut { + sideEffectWithData(Seq.fill(21)(TestCaseClass(1, "foo", 2.0D))) { in => + in.closeAndDisplay() + } + } + + o.size should be > testRowLimit + o.head should be(testCaseClassHeader) + o.last should be(rowLimitReached) + } + + // ----------------------------------------------------------------------------------------------- + // Avro SCollection Tests + // ----------------------------------------------------------------------------------------------- + + import scala.collection.JavaConverters._ + + private val schema = { + def f(name: String, tpe: Schema.Type) = + new Schema.Field( + name, + Schema.createUnion(List(Schema.create(Schema.Type.NULL), Schema.create(tpe)).asJava), + null, null) + + val s = Schema.createRecord("GenericAccountRecord", null, null, false) + s.setFields(List( + f("id", Schema.Type.INT), + f("amount", Schema.Type.DOUBLE), + f("name", Schema.Type.STRING), + f("type", Schema.Type.STRING) + ).asJava) + s + } + + private def getTestGenericAvro(i: Int): GenericRecord = { + val s: Schema = new Parser().parse(schema.toString) + val r = new GenericData.Record(s) + r.put("id", i) + r.put("amount", i.toDouble) + r.put("name", "user" + i) + r.put("type", "checking") + r + } + + private def getTestAccountAvro(): Account = { + Account.newBuilder() + .setId(2) + .setAmount(2.0D) + .setName("user2") + .setType("checking") + .build() + } + + private val avroGenericRecordHeader = s"$table id${tab}amount${tab}name${tab}type" + private val avroAccountHeader = s"$table id${tab}type${tab}name${tab}amount" + + it should "support SCollection of GenericRecord" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinAvroSCollection + val o = captureOut { + sideEffectWithData(Seq.fill(3)(getTestGenericAvro(1))) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs (Seq(avroGenericRecordHeader) ++ + Seq.fill(3)(s"1${tab}1.0${tab}user1${tab}checking")) + o.head should be(avroGenericRecordHeader) + } + + it should "support SCollection of SpecificRecord Avro" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinAvroSCollection + + val o = captureOut { + sideEffectWithData(Seq.fill(3)(getTestAccountAvro())) { in => + in.closeAndDisplay() + } + } + o should contain theSameElementsAs (Seq(avroAccountHeader) ++ + Seq.fill(3)(s"2${tab}checking${tab}user2${tab}2.0")) + o.head should be(avroAccountHeader) + } + + it should "support empty SCollection of SpecificRecord Avro" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinAvroSCollection + captureOut { + sideEffectWithData(Seq.empty[Account]) { in => + in.closeAndDisplay() + } + } should contain theSameElementsAs DisplayHelpers.sCollectionEmptyMsg.split(newline) + } + + it should "support empty SCollection of GenericRecord Avro" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinAvroSCollection + captureOut { + sideEffectWithData(Seq.empty[GenericRecord]) { in => + in.closeAndDisplay() + } + } should contain theSameElementsAs DisplayHelpers.sCollectionEmptyMsg.split(newline) + } + + it should "support SCollection of GenericRecord Avro over row limit" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinAvroSCollection + val o = captureOut { + sideEffectWithData(Seq.fill(21)(getTestGenericAvro(1))) { in => + in.closeAndDisplay() + } + } + + o.size should be > testRowLimit + o.head should be(avroGenericRecordHeader) + o.last should be(rowLimitReached) + } + + it should "support SCollection of SpecificRecord Avro over row limit" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinAvroSCollection + val o = captureOut { + sideEffectWithData(Seq.fill(21)(getTestAccountAvro())) { in => + in.closeAndDisplay() + } + } + + o.size should be > testRowLimit + o.head should be(avroAccountHeader) + o.last should be(rowLimitReached) + } + + // ----------------------------------------------------------------------------------------------- + // TableRow SCollection Tests + // ----------------------------------------------------------------------------------------------- + + private val bQSchema = new TableSchema().setFields(List( + new TableFieldSchema().setName("id").setType("INTEGER"), + new TableFieldSchema().setName("amount").setType("FLOAT"), + new TableFieldSchema().setName("type").setType("STRING"), + new TableFieldSchema().setName("name").setType("STRING") + ).asJava) + + private val bQHeader = s"$table id${tab}amount${tab}type${tab}name" + + private def getBQTableRow(): TableRow = { + TableRow("id" -> 3, "amount" -> 3.0D, "type" -> "checking", "name" -> "user3") + } + + it should "support SCollection of TableRow" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinBQTableSCollection + val o = captureOut { + sideEffectWithData(Seq.fill(3)(getBQTableRow())) { in => + in.closeAndDisplay(bQSchema) + } + } + o should contain theSameElementsAs (Seq(bQHeader) ++ + Seq.fill(3)(s"3${tab}3.0${tab}checking${tab}user3")) + o.head should be(bQHeader) + } + + it should "print error on empty BQ schema" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinBQTableSCollection + captureOut { + sideEffectWithData(Seq.fill(3)(getBQTableRow())) { in => + in.closeAndDisplay(new TableSchema()) + } + } should contain theSameElementsAs DisplayHelpers.bQSchemaIncomplete.split(newline) + } + + it should "support SCollection of TableRow over row limit" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinBQTableSCollection + val o = captureOut { + sideEffectWithData(Seq.fill(21)(getBQTableRow())) { in => + in.closeAndDisplay(bQSchema) + } + } + + o.size should be > testRowLimit + o.head should be(bQHeader) + o.last should be(rowLimitReached) + } + + it should "support empty SCollection of TableRow" in { + import org.apache.zeppelin.scio.DisplaySCollectionImplicits.ZeppelinBQTableSCollection + captureOut { + sideEffectWithData(Seq.empty[TableRow]) { in => + in.closeAndDisplay(new TableSchema()) + } + } should contain theSameElementsAs DisplayHelpers.sCollectionEmptyMsg.split(newline) + } + +} diff --git a/scio/src/test/scala/org/apache/zeppelin/scio/TestCC.scala b/scio/src/test/scala/org/apache/zeppelin/scio/TestCC.scala new file mode 100644 index 00000000000..8928b9900b9 --- /dev/null +++ b/scio/src/test/scala/org/apache/zeppelin/scio/TestCC.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio + +case class TestCaseClass(foo: Int, bar: String, a: Double) + +case class CC22(a1: Int, a2: Int, a3: Int, a4: Int, a5: Int, a6: Int, a7: Int, a8: Int, a9: Int, + a10: Int, a11: Int, a12: Int, a13: Int, a14: Int, a15: Int, a16: Int, a17: Int, + a18: Int, a19: Int, a20: Int, a21: Int, a22: Int) \ No newline at end of file diff --git a/scio/src/test/scala/org/apache/zeppelin/scio/util/TestUtils.scala b/scio/src/test/scala/org/apache/zeppelin/scio/util/TestUtils.scala new file mode 100644 index 00000000000..72271b8df7f --- /dev/null +++ b/scio/src/test/scala/org/apache/zeppelin/scio/util/TestUtils.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio.util + +import java.io.{ByteArrayOutputStream, PrintStream} + +import com.google.common.base.Charsets +import com.spotify.scio.ScioContext +import com.spotify.scio.values.SCollection +import org.apache.zeppelin.scio.DisplayHelpers + +import scala.reflect.ClassTag + +object TestUtils { + val tab = DisplayHelpers.tab + val newline = DisplayHelpers.newline + val table = DisplayHelpers.table + val rowLimitReached = DisplayHelpers.rowLimitReachedMsg.replaceAll(newline,"") + + private[scio] def sideEffectWithData[T: ClassTag](data: Iterable[T]) + (fn: SCollection[T] => Unit): Unit = { + val sc = ScioContext() + fn(sc.parallelize(data)) + if (!sc.isClosed) sc.close() + } + + private[scio] def captureOut[T](body: => T): Seq[String] = { + val bytes = new ByteArrayOutputStream() + val stream = new PrintStream(bytes) + Console.withOut(stream) { body } + bytes.toString(Charsets.UTF_8.toString).split(DisplayHelpers.newline) + } + + +} From dcbb1977bb08286da653b73d8e24ac9e12f525d1 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Tue, 4 Oct 2016 23:28:24 -0400 Subject: [PATCH 21/36] Add documentation link --- docs/_includes/themes/zeppelin/_navigation.html | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/_includes/themes/zeppelin/_navigation.html b/docs/_includes/themes/zeppelin/_navigation.html index 1b1fdbf1035..07533c4cfc6 100644 --- a/docs/_includes/themes/zeppelin/_navigation.html +++ b/docs/_includes/themes/zeppelin/_navigation.html @@ -67,6 +67,7 @@
  • Postgresql, HAWQ
  • R
  • Scalding
  • +
  • Scio
  • Shell
  • Spark
  • From bd4df5e122be2cd42b258910bc3dd371747aba61 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Tue, 4 Oct 2016 23:28:34 -0400 Subject: [PATCH 22/36] Fix documentation style --- docs/interpreter/scio.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md index b12c7c91303..01a8576add8 100644 --- a/docs/interpreter/scio.md +++ b/docs/interpreter/scio.md @@ -36,7 +36,7 @@ Scio is a Scala DSL for [Google Cloud Dataflow](https://github.com/GoogleCloudPl zeppelin.scio.argz --runner=InProcessPipelineRunner - Scio Pipeline runner + Scio interpreter wide arguments zeppelin.scio.maxResult @@ -52,7 +52,7 @@ In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and s ## Using the Scio Interpreter -In a paragraph, use `%scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). Context is shared among all *Scio* paragraphs. There is special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a context via standard `ContextAndArgs`. +In a paragraph, use `%scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). Context is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a context via standard `ContextAndArgs`. ```scala %scio @@ -80,12 +80,11 @@ There can be only one paragraph running at a time. There is no notion of overall Scio interpreter comes with display helpers to ease working with Zeppelin notebooks. Simply use `closeAndDisplay()` on `SCollection` to close context and display the results. The number of results is limited by `zeppelin.scio.maxResult` (by default 1000). Supported `SCollection` types: + * Scio's typed BigQuery - * Scala's case classes - * Scala's tuples + * Scala's Products (case classes, tuples) * Google BigQuery's TableRow * Apache Avro - * All Scala's `Product` like data * All Scala's `AnyVal` #### BigQuery example: From 32751859b00b5f0b98e9e785e815849a74963166 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Wed, 5 Oct 2016 19:46:03 -0400 Subject: [PATCH 23/36] Add license --- zeppelin-distribution/src/bin_license/LICENSE | 111 ++++++++++++++---- .../licenses/LICENSE-java-lsh-0.10 | 7 ++ .../LICENSE-java-string-similarity-0.12 | 7 ++ .../bin_license/licenses/LICENSE-jsoup-1.6.1 | 21 ++++ .../licenses/LICENSE-jtransforms-2.4.0 | 33 ++++++ .../licenses/LICENSE-junit-interface-0.11 | 24 ++++ .../licenses/LICENSE-kryo-shaded-3.0.3 | 10 ++ .../licenses/LICENSE-minilog-1.3.0 | 10 ++ .../licenses/LICENSE-sbt-test-interface-1.0 | 25 ++++ .../LICENSE-scalamacros-paradise-2.1.0 | 27 +++++ .../LICENSE-scalamacros-quasiquotes-2.1.0 | 27 +++++ .../licenses/LICENSE-slf4j-simple-1.7.21 | 21 ++++ .../bin_license/licenses/LICENSE-spire-0.7.4 | 19 +++ .../licenses/LICENSE-spire-macros-0.7.4 | 19 +++ 14 files changed, 340 insertions(+), 21 deletions(-) create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-java-lsh-0.10 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-java-string-similarity-0.12 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-jsoup-1.6.1 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-jtransforms-2.4.0 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-junit-interface-0.11 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-kryo-shaded-3.0.3 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-minilog-1.3.0 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-sbt-test-interface-1.0 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-scalamacros-paradise-2.1.0 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-scalamacros-quasiquotes-2.1.0 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-slf4j-simple-1.7.21 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-spire-0.7.4 create mode 100644 zeppelin-distribution/src/bin_license/licenses/LICENSE-spire-macros-0.7.4 diff --git a/zeppelin-distribution/src/bin_license/LICENSE b/zeppelin-distribution/src/bin_license/LICENSE index 82e2c6cd7b8..bab53fa3a76 100644 --- a/zeppelin-distribution/src/bin_license/LICENSE +++ b/zeppelin-distribution/src/bin_license/LICENSE @@ -1,13 +1,9 @@ +The following components are provided under Apache License. + (Apache 2.0) nvd3.js v1.7.1 (http://nvd3.org/) - https://github.com/novus/nvd3/blob/v1.7.1/LICENSE.md (Apache 2.0) gson v2.2 (com.google.code.gson:gson:jar:2.2 - https://github.com/google/gson) - https://github.com/google/gson/blob/gson-2.2/LICENSE (Apache 2.0) Amazon Web Services SDK for Java v1.10.62 (https://aws.amazon.com/sdk-for-java/) - https://raw.githubusercontent.com/aws/aws-sdk-java/1.10.62/LICENSE.txt (Apache 2.0) JavaEWAH v0.7.9 (https://github.com/lemire/javaewah) - https://github.com/lemire/javaewah/blob/master/LICENSE-2.0.txt - - - -The following components are provided under Apache License. - - (Apache 2.0) Apache Commons Logging (commons-logging:commons-logging:1.1.1 - http://commons.apache.org/proper/commons-logging/) (Apache 2.0) Apache Commons Codec (commons-codec:commons-codec:1.5 - http://commons.apache.org/proper/commons-codec/) (Apache 2.0) Apache Commons Collections (commons-collections:commons-collections:3.2.1 - http://commons.apache.org/proper/commons-configuration/) @@ -19,7 +15,7 @@ The following components are provided under Apache License. (Apache 2.0) Http Components (org.apache.httpcomponents:httpclient:4.3.6 - https://github.com/apache/httpclient) (Apache 2.0) Apache Commons Lang (org.apache.commons:commons-lang:2.5 - http://commons.apache.org/proper/commons-lang/) (Apache 2.0) Apache Commons Lang 3 (org.apache.commons:commons-lang3:3.4 - http://commons.apache.org/proper/commons-lang/) - (Apache 2.0) Apache Commons Math 3 (org.apache.commons:commons-math3:3.4.1 - http://commons.apache.org/proper/commons-math/) + (Apache 2.0) Apache Commons Math 3 (org.apache.commons:commons-math3:3.6.1 - http://commons.apache.org/proper/commons-math/) (Apache 2.0) Apache Commons Net (commons-net:commons-net:2.2 - http://commons.apache.org/proper/commons-net/) (Apache 2.0) Apache log4j (log4j:log4j:1.2.17 - http://logging.apache.org/log4j/1.2/) (Apache 2.0) Apache Commons Pool2 (commons-exec:commons-pool2:2.3 - https://commons.apache.org/proper/commons-pool/) @@ -43,17 +39,24 @@ The following components are provided under Apache License. (Apache 2.0) Apache Lens (http://lens.apache.org/) (Apache 2.0) Apache Flink (http://flink.apache.org/) (Apache 2.0) Apache Beam (http://beam.apache.org/) - (Apache 2.0) Apache Thrift (http://thrift.apache.org/) + (Apache 2.0) Apache Thrift 0.9.2 (org.apache.thrift:libthrift:0.9.2 - http://thrift.apache.org/) (Apache 2.0) Apache Lucene (https://lucene.apache.org/) (Apache 2.0) Apache Zookeeper (org.apache.zookeeper:zookeeper:jar:3.4.5 - http://zookeeper.apache.org/) - (Apache 2.0) Chill (com.twitter:chill-java:jar:0.8.0 - https://github.com/twitter/chill/) + (Apache 2.0) Chill (com.twitter:chill:0.8.0 - https://github.com/twitter/chill/) + (Apache 2.0) Chill Java (com.twitter:chill-java:jar:0.8.0 - https://github.com/twitter/chill/) + (Apache 2.0) Chill Protobuf (com.twitter:chill-protobuf:0.8.0 - https://github.com/twitter/chill/) (Apache 2.0) QDox (com.thoughtworks.qdox:qdox:jar:2.0-M3 - https://github.com/paul-hammant/qdox/) (Apache 2.0) Codehaus Plexus (org.codehaus.plexus:plexus:jar:1.5.6 - https://codehaus-plexus.github.io/) + (Apache 2.0) Codehaus Plexus Interpolation (org.codehaus.plexus:plexus-interpolation:1.14 - https://codehaus-plexus.github.io/) + (Apache 2.0) Codehaus Plexus Component Annotations (org.codehaus.plexus:plexus-component-annotations:1.5.5 - https://codehaus-plexus.github.io/) + (Apache 2.0) Codehaus Plexus Classwords (org.codehaus.plexus:plexus-classwords:2.4 - https://codehaus-plexus.github.io/) + (Apache 2.0) Codehaus Plexus Utils (org.codehaus.plexus:plexus-utils:2.0.7 - https://codehaus-plexus.github.io/) (Apache 2.0) findbugs jsr305 (com.google.code.findbugs:jsr305:jar:1.3.9 - http://findbugs.sourceforge.net/) (Apache 2.0) Google Guava (com.google.guava:guava:15.0 - https://code.google.com/p/guava-libraries/) - (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-core:2.5.3 - https://github.com/FasterXML/jackson-core) - (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-annotations:2.5.3 - https://github.com/FasterXML/jackson-core) - (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-databind:2.5.3 - https://github.com/FasterXML/jackson-core) + (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-core:2.7.0 - https://github.com/FasterXML/jackson-core) + (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-annotations:2.7.0 - https://github.com/FasterXML/jackson-core) + (Apache 2.0) Jackson (com.fasterxml.jackson.core:jackson-databind:2.7.0 - https://github.com/FasterXML/jackson-core) + (Apache 2.0) Jackson Mapper ASL (org.codehaus.jackson:jackson-mapper-asl:1.9.13 - https://mvnrepository.com/artifact/org.codehaus.jackson/jackson-mapper-asl/1.9.13) (Apache 2.0) javax.servlet (org.eclipse.jetty.orbit:javax.servlet:jar:3.1.0.v201112011016 - http://www.eclipse.org/jetty) (Apache 2.0) Joda-Time (joda-time:joda-time:2.8.1 - http://www.joda.org/joda-time/) (Apache 2.0) Jackson (org.codehaus.jackson:jackson-core-asl:1.9.13 - http://jackson.codehaus.org/) @@ -92,7 +95,7 @@ The following components are provided under Apache License. (Apache 2.0) Lucene Spatial 3D (org.apache.lucene:lucene-spatial3d:5.3.1 - http://lucene.apache.org/lucene-parent/lucene-spatial3d) (Apache 2.0) Lucene Suggest (org.apache.lucene:lucene-suggest:5.3.1 - http://lucene.apache.org/lucene-parent/lucene-suggest) (Apache 2.0) Elasticsearch: Core (org.elasticsearch:elasticsearch:2.1.0 - http://nexus.sonatype.org/oss-repository-hosting.html/parent/elasticsearch) - (Apache 2.0) Joda convert (org.joda:joda-convert:1.2 - http://joda-convert.sourceforge.net) + (Apache 2.0) Joda convert (org.joda:joda-convert:1.8.1 - http://joda-convert.sourceforge.net) (Apache 2.0) Shiro Core (org.apache.shiro:shiro-core:1.2.3 - https://shiro.apache.org) (Apache 2.0) Shiro Web (org.apache.shiro:shiro-web:1.2.3 - https://shiro.apache.org) (Apache 2.0) SnakeYAML (org.yaml:snakeyaml:1.15 - http://www.snakeyaml.org) @@ -121,9 +124,10 @@ The following components are provided under Apache License. (Apache 2.0) parboiled-core (org.parboiled:parboiled-core:1.1.7 - https://github.com/sirthias/parboiled) (Apache 2.0) ZkClient (com.101tec:zkclient:0.7 - https://github.com/sgroschupf/zkclient) (Apache 2.0) jackson-module-scala (com.fasterxml.jackson.module:jackson-module-scala_2.10:2.4.4 - http://wiki.fasterxml.com/JacksonModuleScala) - (Apache 2.0) BigQuery API v2-rev295-1.22.0 (com.google.apis:google-api-services-bigquery:v2-rev295-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-bigquery) + (Apache 2.0) BigQuery API v2-rev317-1.22.0 (com.google.apis:google-api-services-bigquery:v2-rev317-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-bigquery) (Apache 2.0) Google Cloud Debugger API v2-rev8-1.22.0 (com.google.apis:google-api-services-clouddebugger:v2-rev8-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-clouddebugger) - (Apache 2.0) Google Dataflow API v1b3-rev30-1.22.0 (com.google.apis:google-api-services-dataflow:v1b3-rev30-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-dataflow) + (Apache 2.0) Google Dataflow API v1b3-rev36-1.22.0 (com.google.apis:google-api-services-dataflow:v1b3-rev36-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-dataflow) + (Apache 2.0) Google Dataflow SDK 1.7.0 (com.google.cloud.dataflow:google-cloud-dataflow-java-sdk-all:1.7.0 - https://github.com/GoogleCloudPlatform/DataflowJavaSDK) (Apache 2.0) Google Cloud Pub/Sub API v1-rev10-1.22.0 (com.google.apis:google-api-services-pubsub:v1-rev10-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-pubsub) (Apache 2.0) Cloud Storage JSON API v1-rev71-1.22.0 (com.google.apis:google-api-services-storage:v1-rev71-1.22.0 - http://nexus.sonatype.org/oss-repository-hosting.html/google-api-services-storage) (Apache 2.0) gcsio.jar (com.google.cloud.bigdataoss:gcsio:1.4.5 - https://github.com/GoogleCloudPlatform/BigData-interop/gcsio/) @@ -165,6 +169,52 @@ The following components are provided under Apache License. (Apache 2.0) tez-yarn-timeline-history-with-acls (org.apache.tez:tez-yarn-timeline-history-with-acls:0.7.0 - http://tez.apache.org) (Apache 2.0) jna (net.java.dev.jna:jna:4.1.0 https://github.com/java-native-access/jna) (Apache 2.0) MathJax v2.7.0 - https://github.com/mathjax/MathJax/blob/2.7.0/LICENSE + (Apache 2.0) Scio REPL 0.2.4 (com.spotify:scio-repl:0.2.4 - https://github.com/spotify/scio) + (Apache 2.0) Scio BigQuery 0.2.4 (com.spotify:scio-bigquery:0.2.4 - https://github.com/spotify/scio) + (Apache 2.0) Scio Core 0.2.4 (com.spotify:scio-core:0.2.4 - https://github.com/spotify/scio) + (Apache 2.0) Scio Extra 0.2.4 (com.spotify:scio-extra:0.2.4 - https://github.com/spotify/scio) + (Apache 2.0) Scio Test 0.2.4 (com.spotify:scio-test:0.2.4 - https://github.com/spotify/scio) + (Apache 2.0) Netty Http2 Codec 4.1.0.CR1 (io.netty:netty-codec-http2:4.1.0.CR1 - https://github.com/netty/netty) + (Apache 2.0) Netty Http Codec 4.1.0.CR1 (io.netty:netty-codec-http:4.1.0.CR1 - https://github.com/netty/netty) + (Apache 2.0) Netty Handler 4.1.0.CR1 (io.netty:netty-handler:4.1.0.CR1 - https://github.com/netty/netty) + (Apache 2.0) Netty Buffer 4.1.0.CR1 (io.netty:netty-buffer:4.1.0.CR1 - https://github.com/netty/netty) + (Apache 2.0) Netty Common 4.1.0.CR1 (io.netty:netty-common:4.1.0.CR1 - https://github.com/netty/netty) + (Apache 2.0) Netty Transport 4.1.0.CR1 (io.netty:netty-transport:4.1.0.CR1 - https://github.com/netty/netty) + (Apache 2.0) Netty Resolver 4.1.0.CR1 (io.netty:netty-resolver:4.1.0.CR1 - https://github.com/netty/netty) + (Apache 2.0) Netty Codec 4.1.0.CR1 (io.netty:netty-codec:4.1.0.CR1 - https://github.com/netty/netty) + (Apache 2.0) Pubsub v1 GRPC Proto 0.0.2 (com.google.api.grpc:grpc-pubsub-v1:0.0.2 - https://github.com/googleapis/googleapis) + (Apache 2.0) Core GRPC Proto 0.0.3 (com.google.api.grpc:grpc-core-proto:0.0.3 - https://github.com/googleapis/googleapis) + (Apache 2.0) Bigtable Proto 0.3.0 (com.google.cloud.bigtable:bigtable-protos:0.3.0 - https://github.com/googleapis/googleapis) + (Apache 2.0) Java Google API Client 1.22.0 (com.google.api-client:google-api-client:1.22.0 - https://github.com/googleapis/googleapis) + (Apache 2.0) Java6 Google API Client 1.22.0 (com.google.api-client:google-api-client-java6:1.22.0 - https://github.com/googleapis/googleapis) + (Apache 2.0) Jackson2 Google API Client 1.22.0 (com.google.api-client:google-api-client-jackson2:1.22.0 - https://github.com/googleapis/googleapis) + (Apache 2.0) Google HTTP Client 1.22.0 (com.google.http-client:google-http-client:1.22.0 - https://github.com/google/google-http-java-client) + (Apache 2.0) Jackson Google HTTP Client 1.22.0 (com.google.http-client:google-http-client-jackson:1.22.0 - https://github.com/google/google-http-java-client) + (Apache 2.0) Jackson2 Google HTTP Client 1.22.0 (com.google.http-client:google-http-client-jackson2:1.22.0 - https://github.com/google/google-http-java-client) + (Apache 2.0) Protobuf Google HTTP Client 1.22.0 (com.google.http-client:google-http-client-protobuf:1.22.0 - https://github.com/google/google-http-java-client) + (Apache 2.0) Google OAuth Java6 Client 1.22.0 (com.google.oauth-client:google-oauth-client-java6:1.22.0 - https://github.com/google/google-oauth-java-client) + (Apache 2.0) Google OAuth Client 1.22.0 (com.google.oauth-client:google-oauth-client:1.22.0 - https://github.com/google/google-oauth-java-client) + (Apache 2.0) Google API Datastore Protobuf v1beta2-rev1-4.0.0 (com.google.apis:google-api-services-datastore-protobuf:v1beta2-rev1-4.0.0 - https://github.com/googleapis/googleapis) + (Apache 2.0) Google Datastore v1 Protobuf Client 1.1.0 (com.google.cloud.datastore:datastore-v1-proto-client:1.1.0 - https://github.com/GoogleCloudPlatform/google-cloud-datastore) + (Apache 2.0) Google Datastore v1 Protos 1.0.1 (com.google.cloud.datastore:datastore-v1-protos:1.0.1 - https://github.com/googleapis/googleapis) + (Apache 2.0) Twitter Algebird 0.12.1 (com.twitter:algebird-core:0.12.1 - https://github.com/twitter/algebird) + (Apache 2.0) Breeze 0.12 (org.scalanlp:breeze:0.12 - https://github.com/scalanlp/breeze) + (Apache 2.0) Breeze Macros 0.12 (org.scalanlp:breeze-macros:0.12 - https://github.com/scalanlp/breeze) + (Apache 2.0) Opencsv 2.3 (net.sf.opencsv:opencsv:2.3 - http://opencsv.sourceforge.net/) + (Apache 2.0) Shapeless 2.0.0 (com.chuusai:shapeless:2.0.0 - https://github.com/milessabin/shapeless) + (Apache 2.0) Katan CSV 0.1.12 (com.nrinaudo:kantan.csv:0.1.12 - https://github.com/nrinaudo/kantan.csv) + (Apache 2.0) Katan Codecs 0.1.6 (com.nrinaudo:kantan.codecs:0.1.6 - https://github.com/nrinaudo/kantan.csv) + (Apache 2.0) jansi 1.4 (org.fusesource.jansi:jansi:1.4 - https://github.com/fusesource/jansi)/ + (Apache 2.0) Maven Plugin API 3.0 (org.apache.maven:maven-plugin-api:3.0 - https://github.com/apache/maven) + (Apache 2.0) Maven Artifact 3.0 (org.apache.maven:maven-artifact:3.0 - https://github.com/apache/maven) + (Apache 2.0) Maven Model Builder 3.0.3 (org.apache.maven:maven-model-builder:3.0.3 - https://github.com/apache/maven) + (Apache 2.0) Maven Repository Metadata 3.0.3 (org.apache.maven:maven-repository-metadata:3.0.3 - https://github.com/apache/maven) + (Apache 2.0) Maven Wagon Provider API 1.0 (org.apache.maven.wagon:wagon-provider-api:1.0 - https://mvnrepository.com/artifact/org.apache.maven.wagon/wagon-provider-api/1.0) + (Apache 2.0) Maven Wagon HTTP Lightweight 1.0 (org.apache.maven.wagon:wagon-http-lightweight:1.0 - https://mvnrepository.com/artifact/org.apache.maven.wagon/wagon-http-lightweight/1.0) + (Apache 2.0) Maven Wagon HTTP 1.0 (org.apache.maven.wagon:wagon-http:1.0 - https://mvnrepository.com/artifact/org.apache.maven.wagon/wagon-http/1.0) + (Apache 2.0) Maven Wagon HTTP Shared 1.0 (org.apache.maven.wagon:wagon-http-shared:1.0 - https://mvnrepository.com/artifact/org.apache.maven.wagon/wagon-http-shared/1.0) + (Apache 2.0) Commons HTTP Client 3.1 (commons-httpclient:commons-httpclient:3.1 - https://mvnrepository.com/artifact/commons-httpclient/commons-httpclient/3.1) + (Apache 2.0) Scalatest 2.2.4 (org.scalatest:scalatest_2.10:2.2.4 - https://github.com/scalatest/scalatest) ======================================================================== MIT licenses @@ -197,14 +247,13 @@ The text of each license is also included at licenses/LICENSE-[project]-[version (The MIT License) Moment v2.9.0 (https://github.com/moment/moment) - https://github.com/moment/moment/blob/2.9.0/LICENSE (The MIT License) Pikaday v1.3.2 (https://github.com/dbushell/Pikaday) - https://github.com/dbushell/Pikaday/blob/1.3.2/LICENSE (The MIT License) slf4j v1.7.10 (org.slf4j:slf4j-api:jar:1.7.10 - http://www.slf4j.org) - http://www.slf4j.org/license.html + (The MIT License) slf4j v1.7.21 (org.slf4j:slf4j-simple:1.7.21 - http://www.slf4j.org) - http://www.slf4j.org/license.html (The MIT License) slf4j-log4j12 v1.7.10 (org.slf4j:slf4j-log4j12:jar:1.7.10 - http://www.slf4j.org) - http://www.slf4j.org/license.html (The MIT License) bcprov-jdk15on v1.51 (org.bouncycastle:bcprov-jdk15on:jar:1.51 - http://www.bouncycastle.org/java.html) - http://www.bouncycastle.org/licence.html (The MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs) - https://github.com/bryanbraun/anchorjs/blob/master/README.md#license (The MIT License) moment-duration-format v1.3.0 (https://github.com/jsmreese/moment-duration-format) - https://github.com/jsmreese/moment-duration-format/blob/master/LICENSE (The MIT License) github-markdown-css 2.4.0 (https://github.com/sindresorhus/github-markdown-css) - https://github.com/sindresorhus/github-markdown-css/blob/gh-pages/license (The MIT License) scopt (com.github.scopt:scopt_2.10:3.2.0 - https://github.com/scopt/scopt) -The following components are provided under the MIT License. - (The MIT License) Objenesis (org.objenesis:objenesis:2.1 - https://github.com/easymock/objenesis) - Copyright (c) 2006-2015 the original author and authors (The MIT License) JCL 1.1.1 implemented over SLF4J (org.slf4j:jcl-over-slf4j:1.7.16 - http://www.slf4j.org) (The MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.16 - http://www.slf4j.org) @@ -212,6 +261,11 @@ The following components are provided under the MIT License. (The MIT License) minimal-json (com.eclipsesource.minimal-json:minimal-json:0.9.4 - https://github.com/ralfstx/minimal-json) (The MIT License) pyrolite (net.razorvine:pyrolite:4.9) - https://github.com/irmen/Pyrolite/blob/v4.9/LICENSE) (The MIT License) libpam4j (org.kohsuke:libpam4j:1.8 https://github.com/kohsuke/libpam4j/blob/master/src/main/java/org/jvnet/libpam/PAM.java) + (The MIT License) Spire 0.7.4 (org.spire-math:spire:0.7.4 - https://github.com/non/spire) + (The MIT License) Spire Macros 0.7.4 (org.spire-math:spire-macros:0.7.4 - https://github.com/non/spire) + (The MIT License) Java String Similarity 0.12 (info.debatty:java-string-similarity:0.12 - https://github.com/tdebatty/java-string-similarity) + (The MIT License) Java LSH 0.10 (info.debatty:java-lsh:0.10 - https://github.com/tdebatty/java-LSH) + (The MIT License) JSoup 1.6.1 (org.jsoup:jsoup:1.6.1 - https://github.com/jhy/jsoup/) ======================================================================== BSD-style licenses @@ -242,8 +296,10 @@ The text of each license is also included at licenses/LICENSE-[project]-[version (BSD 3-Clause) io.grpc:grpc-protobuf-lite (io.grpc:grpc-protobuf-lite:0.14.1 - https://github.com/grpc/grpc-java) (BSD 3-Clause) io.grpc:grpc-protobuf-nano (io.grpc:grpc-protobuf-nano:0.14.1 - https://github.com/grpc/grpc-java) (BSD 3-Clause) io.grpc:grpc-stub (io.grpc:grpc-stub:0.14.1 - https://github.com/grpc/grpc-java) - - + (BSD-3-Clause) Kryo 3.0.3 (com.esotericsoftware:kryo-shaded:3.0.3 - https://github.com/EsotericSoftware/kryo) + (BSD-3-Clause) Minilog 1.3.0 (com.esotericsoftware:minlog:1.3.0 - https://github.com/EsotericSoftware/minlog) + + The following components are provided under the BSD-style License. (New BSD License) JGit (org.eclipse.jgit:org.eclipse.jgit:jar:4.1.1.201511131810-r - https://eclipse.org/jgit/) @@ -275,6 +331,10 @@ The following components are provided under the BSD-style License. (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org) (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net) (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net) + (BSD-3-Clause) Scalamacros Paradise 2.1.0 (org.scalamacros:paradise6:2.1.0 - https://github.com/scalamacros/paradise) + (BSD-3-Clause) Scalamacros Quasiquotes 2.1.0 (org.scalamacros:quasiquotes:2.1.0 - https://mvnrepository.com/artifact/org.scalamacros/quasiquotes_2.10/2.1.0) + (BSD-2-Clause) JUnit Interface 0.11 (com.novocode:junit-interface:0.11 - https://github.com/sbt/junit-interface) + (BSD-3-Clause) SBT Test Interface (org.scala-sbt:test-interface:1.0 - https://github.com/sbt/test-interface) ======================================================================== CDDL license @@ -291,11 +351,11 @@ The following components are provided under the CDDL License. (CDDL 1.1) jersey-json (com.sun.jersey:jersey-json:1.9 - https://jersey.java.net/jersey-json/) (CDDL 1.1) jersey-server (com.sun.jersey:jersey-server:1.9 - https://jersey.java.net/jersey-server/) (CDDL 1.1) jersey-guice (com.sun.jersey.contribs:jersey-guice:1.9 - https://jersey.java.net/jersey-contribs/jersey-guice/) - (CDDL 1.1) JAXB RI (com.sun.xml.bind:jaxb-impl:2.2.3-1 - http://jaxb.java.net/) + (CDDL 1.1) JAXB RI (com.sun.xml.bind:jaxb-impl:2.2.3-1 - http://jaxb.java.net/) (CDDL 1.0) Java Servlet API (javax.servlet:javax.servlet-api:3.1.0 - http://servlet-spec.java.net) (CDDL 1.1) (GPL2 w/ CPE) JAXB API bundle for GlassFish V3 (javax.xml.bind:jaxb-api:2.2.2 - https://jaxb.dev.java.net/) (CDDL 1.0) (GNU General Public Library) Streaming API for XML (javax.xml.stream:stax-api:1.0-2 - no url defined) - + ======================================================================== EPL license @@ -306,6 +366,10 @@ The following components are provided under the EPL License. (EPL 1.0) Aether (org.sonatype.aether - http://www.eclipse.org/aether/) (EPL 1.0) JDT Annotations For Enhanced Null Analysis (org.eclipse.jdt:org.eclipse.jdt.annotation:1.1.0 - https://repo.eclipse.org/content/repositories/eclipse-releases/org/eclipse/jdt/org.eclipse.jdt.annotation) (EPL 1.0) JRuby (org.jruby.jruby-complete:v1.6.8 - http://www.jruby.org/) + (EPL 1.0) Sisu Inject Plexus 2.2.2 (org.sonatype.sisu:sisu-inject-plexus:2.2.2 - https://github.com/sonatype/sisu) + (EPL 1.0) Sisu Inject Bean 2.2.2 (org.sonatype.sisu:sisu-inject-bean:2.2.2 - https://github.com/sonatype/sisu) + (EPL 1.0) Sisu Inject Guice (org.sonatype.sisu:sisu-inject-guice:no_aop- https://github.com/sonatype/sisu) + (EPL 1.0) JUnit 4.11 (junit:junit:4.11 - https://github.com/junit-team/junit4) @@ -342,3 +406,8 @@ Creative Commons CC0 (http://creativecommons.org/publicdomain/zero/1.0/) (Public Domain, per Creative Commons CC0) HdrHistogram (org.hdrhistogram:HdrHistogram:2.1.6 - http://hdrhistogram.github.io/HdrHistogram/) (Public Domain) XZ for Java (org.tukaani:xz:1.0 - http://tukaani.org/xz/java.html) (Public Domain) AOP alliance (aopalliance:aopalliance:1.0 - http://aopalliance.sourceforge.net) + +======================================================================== +Multiple licenses +======================================================================== + (LGPLv2) (GPLv2) (MPL 1.1) Jtransforms (com.github.rwl:jtransforms:2.4.0 - https://sourceforge.net/projects/jtransforms/) diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-java-lsh-0.10 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-java-lsh-0.10 new file mode 100644 index 00000000000..73e15450a75 --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-java-lsh-0.10 @@ -0,0 +1,7 @@ +Copyright 2015 Thibault Debatty. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-java-string-similarity-0.12 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-java-string-similarity-0.12 new file mode 100644 index 00000000000..73e15450a75 --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-java-string-similarity-0.12 @@ -0,0 +1,7 @@ +Copyright 2015 Thibault Debatty. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-jsoup-1.6.1 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-jsoup-1.6.1 new file mode 100644 index 00000000000..9e15540218f --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-jsoup-1.6.1 @@ -0,0 +1,21 @@ +The MIT License + +© 2009-2016, Jonathan Hedley + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-jtransforms-2.4.0 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-jtransforms-2.4.0 new file mode 100644 index 00000000000..870b10d66e4 --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-jtransforms-2.4.0 @@ -0,0 +1,33 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is JTransforms. + * + * The Initial Developer of the Original Code is + * Piotr Wendykier, Emory University. + * Portions created by the Initial Developer are Copyright (C) 2007-2009 + * the Initial Developer. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-junit-interface-0.11 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-junit-interface-0.11 new file mode 100644 index 00000000000..c9ef892077c --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-junit-interface-0.11 @@ -0,0 +1,24 @@ +Copyright (c) 2009-2012, Stefan Zeiger +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-kryo-shaded-3.0.3 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-kryo-shaded-3.0.3 new file mode 100644 index 00000000000..e1cd88478ed --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-kryo-shaded-3.0.3 @@ -0,0 +1,10 @@ +Copyright (c) 2008, Nathan Sweet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of Esoteric Software nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-minilog-1.3.0 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-minilog-1.3.0 new file mode 100644 index 00000000000..e1cd88478ed --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-minilog-1.3.0 @@ -0,0 +1,10 @@ +Copyright (c) 2008, Nathan Sweet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of Esoteric Software nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-sbt-test-interface-1.0 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-sbt-test-interface-1.0 new file mode 100644 index 00000000000..21655fae147 --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-sbt-test-interface-1.0 @@ -0,0 +1,25 @@ + +Copyright (c) 2009, 2010 Josh Cough, Mark Harrah +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-scalamacros-paradise-2.1.0 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-scalamacros-paradise-2.1.0 new file mode 100644 index 00000000000..6c8bb4d95d3 --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-scalamacros-paradise-2.1.0 @@ -0,0 +1,27 @@ +Copyright (c) 2013-2015 EPFL + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the EPFL nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-scalamacros-quasiquotes-2.1.0 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-scalamacros-quasiquotes-2.1.0 new file mode 100644 index 00000000000..6c8bb4d95d3 --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-scalamacros-quasiquotes-2.1.0 @@ -0,0 +1,27 @@ +Copyright (c) 2013-2015 EPFL + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the EPFL nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-slf4j-simple-1.7.21 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-slf4j-simple-1.7.21 new file mode 100644 index 00000000000..28ba154fbb0 --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-slf4j-simple-1.7.21 @@ -0,0 +1,21 @@ +Copyright (c) 2004-2013 QOS.ch + All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-spire-0.7.4 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-spire-0.7.4 new file mode 100644 index 00000000000..645cba601fd --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-spire-0.7.4 @@ -0,0 +1,19 @@ +Copyright (c) 2011-2012 Erik Osheim, Tom Switzer + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/zeppelin-distribution/src/bin_license/licenses/LICENSE-spire-macros-0.7.4 b/zeppelin-distribution/src/bin_license/licenses/LICENSE-spire-macros-0.7.4 new file mode 100644 index 00000000000..645cba601fd --- /dev/null +++ b/zeppelin-distribution/src/bin_license/licenses/LICENSE-spire-macros-0.7.4 @@ -0,0 +1,19 @@ +Copyright (c) 2011-2012 Erik Osheim, Tom Switzer + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 8f25f71f2c9bd436a0bd07fac72ed19173477cad Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Fri, 7 Oct 2016 12:39:19 -0400 Subject: [PATCH 24/36] Upgrade scio to 0.2.4 --- scio/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scio/pom.xml b/scio/pom.xml index 7cae0a5cbeb..5679890606b 100644 --- a/scio/pom.xml +++ b/scio/pom.xml @@ -34,7 +34,7 @@ Zeppelin Scio support - 0.2.3 + 0.2.4 From 0920fddd9fe3162418b87595cb44da55ae2002ad Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Fri, 7 Oct 2016 15:30:21 -0400 Subject: [PATCH 25/36] Remove obsolete deps --- scio/pom.xml | 151 +-------------------------------------------------- 1 file changed, 2 insertions(+), 149 deletions(-) diff --git a/scio/pom.xml b/scio/pom.xml index 5679890606b..cf3700c7f04 100644 --- a/scio/pom.xml +++ b/scio/pom.xml @@ -43,17 +43,6 @@ slf4j-api - - org.slf4j - slf4j-log4j12 - - - - ${project.groupId} - zeppelin-display_${scala.binary.version} - ${project.version} - - ${project.groupId} zeppelin-interpreter @@ -64,6 +53,7 @@ com.spotify scio-repl_${scala.binary.version} ${scio.version} + REPL dependency brings other Scio dependencies @@ -72,126 +62,6 @@ 14.0.1 - - - org.apache.maven - maven-plugin-api - 3.0 - - - org.codehaus.plexus - plexus-utils - - - org.sonatype.sisu - sisu-inject-plexus - - - org.apache.maven - maven-model - - - - - org.sonatype.aether - aether-api - 1.12 - - - org.sonatype.aether - aether-util - 1.12 - - - org.sonatype.aether - aether-impl - 1.12 - - - - org.apache.maven - maven-aether-provider - 3.0.3 - - - org.sonatype.aether - aether-api - - - org.sonatype.aether - aether-spi - - - org.sonatype.aether - aether-util - - - org.sonatype.aether - aether-impl - - - org.codehaus.plexus - plexus-utils - - - - - - org.sonatype.aether - aether-connector-file - 1.12 - - - - org.sonatype.aether - aether-connector-wagon - 1.12 - - - org.apache.maven.wagon - wagon-provider-api - - - - - - org.apache.maven.wagon - wagon-provider-api - 1.0 - - - org.codehaus.plexus - plexus-utils - - - - - - org.apache.maven.wagon - wagon-http-lightweight - 1.0 - - - org.apache.maven.wagon - wagon-http-shared - - - - - - org.apache.maven.wagon - wagon-http - 1.0 - - - - - - org.apache.commons - commons-exec - 1.3 - - org.scala-lang scala-library @@ -210,17 +80,6 @@ ${scala.version} - - commons-lang - commons-lang - - - - org.apache.commons - commons-compress - 1.9 - - org.scalatest @@ -235,13 +94,6 @@ test - - com.spotify - scio-test_${scala.binary.version} - ${scio.version} - test - - @@ -383,6 +235,7 @@ 1.7 + From 327273ea13391bd062d3b404feb3cc10f1bc7082 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Fri, 7 Oct 2016 19:36:46 -0400 Subject: [PATCH 26/36] Add Zeppelin custom ContextAndArgs To support Dataflow service execution, which requires REPL state dump in jar, we add custom ContextAndArgs. --- scio/pom.xml | 1 - .../apache/zeppelin/scio/ContextAndArgs.scala | 41 +++++++++++++++++++ .../zeppelin/scio/ScioInterpreter.scala | 6 ++- 3 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 scio/src/main/scala/org/apache/zeppelin/scio/ContextAndArgs.scala diff --git a/scio/pom.xml b/scio/pom.xml index cf3700c7f04..27f5433ee15 100644 --- a/scio/pom.xml +++ b/scio/pom.xml @@ -53,7 +53,6 @@ com.spotify scio-repl_${scala.binary.version} ${scio.version} - REPL dependency brings other Scio dependencies diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ContextAndArgs.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ContextAndArgs.scala new file mode 100644 index 00000000000..cb1b390be4f --- /dev/null +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ContextAndArgs.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.scio + +import com.google.cloud.dataflow.sdk.options.PipelineOptions +import com.spotify.scio.repl.ReplScioContext +import com.spotify.scio.{Args, ScioContext} + +/** + * Convenience object for creating [[com.spotify.scio.ScioContext]] and [[com.spotify.scio.Args]]. + */ +object ContextAndArgs { + def apply(argz: Array[String]): (ScioContext, Args) = { + val (dfOpts, args) = ScioContext.parseArguments[PipelineOptions](argz) + + val nextReplJar = this + .getClass + .getClassLoader + .asInstanceOf[{def getNextReplCodeJarPath: String}].getNextReplCodeJarPath + + val sc = new ReplScioContext(dfOpts, List(nextReplJar)) + sc.setName("sciozeppelin") + + (sc, args) + } +} diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index 6f0f1947175..514ba3b1703 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -55,15 +55,16 @@ import scala.tools.nsc.util.ClassPath class ScioInterpreter(property: Properties) extends Interpreter(property) { private val logger = LoggerFactory.getLogger(classOf[ScioInterpreter]) - private var REPL: ScioILoop = null + private var REPL: ScioILoop = _ val innerOut = new InterpreterOutputStream(logger) override def open(): Unit = { - val argz: List[String] = Option(getProperty("zeppelin.scio.argz")) + val argz = Option(getProperty("zeppelin.scio.argz")) .getOrElse(s"--runner=${classOf[InProcessPipelineRunner].getSimpleName}") .split(" ") .map(_.trim) + .filter(_.nonEmpty) .toList // Process command line arguments into a settings object, and use that to start the REPL. @@ -127,6 +128,7 @@ class ScioInterpreter(property: Properties) extends Interpreter(property) { REPL.interpret(s"""val argz = Array("${argz.mkString("\", \"")}")""") REPL.interpret("import org.apache.zeppelin.scio.DisplaySCollectionImplicits._") REPL.interpret("import org.apache.zeppelin.scio.DisplayTapImplicits._") + REPL.interpret("import org.apache.zeppelin.scio.ContextAndArgs") } private def parseAndPartitionArgs(args: List[String]): (List[String], List[String]) = { From e9579d8736d00cb09a48addd07d1e884f0b33d26 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Mon, 10 Oct 2016 23:55:20 -0400 Subject: [PATCH 27/36] Clarify Context sharing + add docs about display helpers --- docs/interpreter/scio.md | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md index 01a8576add8..4c0291ea595 100644 --- a/docs/interpreter/scio.md +++ b/docs/interpreter/scio.md @@ -52,7 +52,7 @@ In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and s ## Using the Scio Interpreter -In a paragraph, use `%scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). Context is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a context via standard `ContextAndArgs`. +In a paragraph, use `%scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`. ```scala %scio @@ -69,7 +69,7 @@ val (sc, args) = ContextAndArgs(argz) sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay() ``` -Please refer to [Scio wiki](https://github.com/spotify/scio/wiki) for more complex examples. +If you close Scio context, go ahead an create a new one using `ContextAndArgs`. Please refer to [Scio wiki](https://github.com/spotify/scio/wiki) for more complex examples. You can close Scio context much the same way as in Scio REPL, and use Zeppelin display helpers to synchronously close and display results - read more below. ### Progress @@ -87,6 +87,22 @@ Supported `SCollection` types: * Apache Avro * All Scala's `AnyVal` +#### Helper methods + +There are different helper methods for different objects. You can easily display results from `SCollection`, `Future[Tap]` and `Tap`. + +##### `SCollection` helper + +`SCollection` has `closeAndDisplay` Zeppelin helper method for types listed above. Use it to synchronously close Scio context, once available pull and display results. + +##### `Future[Tap]` helper + +`Future[Tap]` has `waitAndDisplay` Zeppelin helper method for types listed above. Use it synchronously wait for results, once available pull and display results. + +##### `Tap` helper + +`Tap` has `display` Zeppelin helper method for types listed above. Use it to pull and display results. + #### BigQuery example: ```scala From 3c519f1ae728bb1b5666052a7414d4d50ef9e1bc Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Tue, 11 Oct 2016 00:21:34 -0400 Subject: [PATCH 28/36] Fix doc style --- docs/interpreter/scio.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md index 4c0291ea595..e367490a540 100644 --- a/docs/interpreter/scio.md +++ b/docs/interpreter/scio.md @@ -52,7 +52,7 @@ In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and s ## Using the Scio Interpreter -In a paragraph, use `%scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`. +In a paragraph, use `%scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports, execution etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`. ```scala %scio @@ -103,6 +103,8 @@ There are different helper methods for different objects. You can easily display `Tap` has `display` Zeppelin helper method for types listed above. Use it to pull and display results. +### Examples + #### BigQuery example: ```scala From 1e30f76d6e7cde35cd17dd3d86ceb5d68e19d260 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 13 Oct 2016 10:09:44 -0400 Subject: [PATCH 29/36] Simplify SCollection implicits --- .../scio/DisplaySCollectionImplicits.scala | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/DisplaySCollectionImplicits.scala b/scio/src/main/scala/org/apache/zeppelin/scio/DisplaySCollectionImplicits.scala index f7421dc41b7..566e106c578 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/DisplaySCollectionImplicits.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/DisplaySCollectionImplicits.scala @@ -44,8 +44,7 @@ object DisplaySCollectionImplicits { /** Convenience method to close the current [[com.spotify.scio.ScioContext]] * and display elements from SCollection. */ def closeAndDisplay(printer: (T) => String = (e: T) => e.toString): Unit = { - val it = materialize(self).waitForResult().value - DisplayHelpers.displayAnyVal(it, printer) + DisplayTapImplicits.ZeppelinTap(materialize(self).waitForResult()).display(printer) } } @@ -56,8 +55,7 @@ object DisplaySCollectionImplicits { /** Convenience method to close the current [[com.spotify.scio.ScioContext]] * and display elements from SCollection. */ def closeAndDisplay(printer: (T) => String = (e: T) => e.toString): Unit = { - val it = materialize(self).waitForResult().value - DisplayHelpers.displayString(it, printer) + DisplayTapImplicits.ZeppelinStringTap(materialize(self).waitForResult()).display(printer) } } @@ -67,8 +65,7 @@ object DisplaySCollectionImplicits { /** Convenience method to close the current [[com.spotify.scio.ScioContext]] * and display elements from KV SCollection. */ def closeAndDisplay(): Unit = { - val it = materialize(self).waitForResult().value - DisplayHelpers.displayKV(it) + DisplayTapImplicits.ZeppelinKVTap(materialize(self).waitForResult()).display() } } @@ -79,8 +76,7 @@ object DisplaySCollectionImplicits { /** Convenience method to close the current [[com.spotify.scio.ScioContext]] * and display elements from Product like SCollection */ def closeAndDisplay(): Unit = { - val it = materialize(self).waitForResult().value - DisplayHelpers.displayProduct(it) + DisplayTapImplicits.ZeppelinProductTap(materialize(self).waitForResult()).display() } } @@ -91,8 +87,7 @@ object DisplaySCollectionImplicits { /** Convenience method to close the current [[com.spotify.scio.ScioContext]] * and display elements from Avro like SCollection */ def closeAndDisplay(schema: Schema = null): Unit = { - val it = materialize(self).waitForResult().value - DisplayHelpers.displayAvro(it, schema) + DisplayTapImplicits.ZeppelinAvroTap(materialize(self).waitForResult()).display(schema) } } @@ -103,8 +98,7 @@ object DisplaySCollectionImplicits { /** Convenience method to close the current [[com.spotify.scio.ScioContext]] * and display elements from TableRow like SCollection */ def closeAndDisplay(schema: TableSchema): Unit = { - val it = materialize(self).waitForResult().value - DisplayHelpers.displayBQTableRow(it, schema) + DisplayTapImplicits.ZeppelinBQTableTap(materialize(self).waitForResult()).display(schema) } } From b961791b6c4f5f47c33124f269104fc9c9a790f3 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 13 Oct 2016 10:23:45 -0400 Subject: [PATCH 30/36] Check the message content --- .../java/org/apache/zeppelin/scio/ScioInterpreterTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java b/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java index 00a036dcacd..bd338865539 100644 --- a/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java +++ b/scio/src/test/java/org/apache/zeppelin/scio/ScioInterpreterTest.java @@ -79,14 +79,14 @@ public void testBasicSuccess() { public void testBasicSyntaxError() { InterpreterResult error = repl.interpret("val a:Int = 'ds'", context); assertEquals(InterpreterResult.Code.ERROR, error.code()); - assertTrue(error.message().length() > 0); + assertEquals("Interpreter error", error.message()); } @Test public void testBasicIncomplete() { InterpreterResult incomplete = repl.interpret("val a = \"\"\"", context); assertEquals(InterpreterResult.Code.INCOMPLETE, incomplete.code()); - assertTrue(incomplete.message().length() > 0); + assertEquals("Incomplete expression", incomplete.message()); } @Test From cd79fc8360c47d1d29c4ccf36c96a7b5c77fb863 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 20 Oct 2016 19:25:17 -0400 Subject: [PATCH 31/36] Add .bigquery cache to gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 6352f7b88d0..1d37d397676 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,9 @@ zeppelin-web/bower_components .Rhistory /R/ +# scio +.bigquery/ + # project level /logs/ /run/ From 49cf0eb482395ce59f2c387c46fd85864d92522b Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Thu, 20 Oct 2016 19:25:31 -0400 Subject: [PATCH 32/36] Add scio to beam group --- beam/pom.xml | 8 +++++- .../main/resources/interpreter-setting.json | 22 ++++++++++++++++ docs/interpreter/scio.md | 16 ++++++------ .../main/resources/interpreter-setting.json | 25 ------------------- .../zeppelin/scio/ScioInterpreter.scala | 2 +- 5 files changed, 38 insertions(+), 35 deletions(-) delete mode 100644 scio/src/main/resources/interpreter-setting.json diff --git a/beam/pom.xml b/beam/pom.xml index b0f165647b6..3d36332f13e 100644 --- a/beam/pom.xml +++ b/beam/pom.xml @@ -102,7 +102,13 @@ - + + + org.apache.zeppelin + zeppelin-scio_${scala.binary.version} + ${project.version} + + org.apache.hadoop hadoop-hdfs diff --git a/beam/src/main/resources/interpreter-setting.json b/beam/src/main/resources/interpreter-setting.json index 189e08b5d82..428b76ddd48 100644 --- a/beam/src/main/resources/interpreter-setting.json +++ b/beam/src/main/resources/interpreter-setting.json @@ -9,5 +9,27 @@ "editor": { "editOnDblClick": false } + }, + { + "group": "beam", + "name": "scio", + "className": "org.apache.zeppelin.scio.ScioInterpreter", + "properties": { + "zeppelin.scio.argz": { + "envName": "ZEPPELIN_SCIO_ARGZ", + "propertyName": "zeppelin.scio.argz", + "defaultValue": "--runner=InProcessPipelineRunner", + "description": "Scio interpreter wide arguments" + }, + "zeppelin.scio.maxResult": { + "envName": "ZEPPELIN_SCIO_MAXRESULT", + "propertyName": "zeppelin.scio.maxResult", + "defaultValue": "1000", + "description": "Max number of SCollection results to display." + } + }, + "editor": { + "language": "scala" + } } ] diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md index e367490a540..f908f73d0cb 100644 --- a/docs/interpreter/scio.md +++ b/docs/interpreter/scio.md @@ -48,14 +48,14 @@ Scio is a Scala DSL for [Google Cloud Dataflow](https://github.com/GoogleCloudPl ## Enabling the Scio Interpreter -In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and select **scio**. +In a notebook, to enable the **Scio** interpreter, click the **Gear** icon and select **beam** (**beam.scio**). ## Using the Scio Interpreter -In a paragraph, use `%scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports, execution etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`. +In a paragraph, use `$beam.scio` to select the **Scio** interpreter. You can use it much the same way as vanilla Scala REPL and [Scio REPL](https://github.com/spotify/scio/wiki/Scio-REPL). State (like variables, imports, execution etc) is shared among all *Scio* paragraphs. There is a special variable **argz** which holds arguments from Scio interpreter settings. The easiest way to proceed is to create a Scio context via standard `ContextAndArgs`. ```scala -%scio +$beam.scio val (sc, args) = ContextAndArgs(argz) ``` @@ -64,7 +64,7 @@ Use `sc` context the way you would in regular pipeline/REPL. Example: ```scala -%scio +$beam.scio val (sc, args) = ContextAndArgs(argz) sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay() ``` @@ -108,7 +108,7 @@ There are different helper methods for different objects. You can easily display #### BigQuery example: ```scala -%scio +$beam.scio @BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays |FROM [bigquery-samples:airline_ontime_data.flights] |group by departure_airport @@ -122,7 +122,7 @@ sc.bigQuerySelect(Flights.query).closeAndDisplay(Flights.schema) #### BigQuery typed example: ```scala -%scio +$beam.scio @BigQueryType.fromQuery("""|SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays |FROM [bigquery-samples:airline_ontime_data.flights] |group by departure_airport @@ -136,7 +136,7 @@ sc.typedBigQuery[Flights]().flatMap(_.no_of_delays).mean.closeAndDisplay() #### Avro example: ```scala -%scio +$beam.scio import com.spotify.data.ExampleAvro val (sc, args) = ContextAndArgs(argz) @@ -146,7 +146,7 @@ sc.avroFile[ExampleAvro]("gs:///tmp/my.avro").take(10).closeAndDisplay() #### Avro example with a view schema: ```scala -%scio +$beam.scio import com.spotify.data.ExampleAvro import org.apache.avro.Schema diff --git a/scio/src/main/resources/interpreter-setting.json b/scio/src/main/resources/interpreter-setting.json deleted file mode 100644 index 80b60772388..00000000000 --- a/scio/src/main/resources/interpreter-setting.json +++ /dev/null @@ -1,25 +0,0 @@ -[ - { - "group": "scio", - "name": "scio", - "className": "org.apache.zeppelin.scio.ScioInterpreter", - "defaultInterpreter": true, - "properties": { - "zeppelin.scio.argz": { - "envName": "ZEPPELIN_SCIO_ARGZ", - "propertyName": "zeppelin.scio.argz", - "defaultValue": "--runner=InProcessPipelineRunner", - "description": "Scio interpreter wide arguments" - }, - "zeppelin.scio.maxResult": { - "envName": "ZEPPELIN_SCIO_MAXRESULT", - "propertyName": "zeppelin.scio.maxResult", - "defaultValue": "1000", - "description": "Max number of SCollection results to display." - } - }, - "editor": { - "language": "scala" - } - } -] diff --git a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala index 514ba3b1703..00d011f0129 100644 --- a/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala +++ b/scio/src/main/scala/org/apache/zeppelin/scio/ScioInterpreter.scala @@ -45,7 +45,7 @@ import scala.tools.nsc.util.ClassPath *

    * How to use:
    * {@code - * %scio + * $beam.scio * val (sc, args) = ContextAndArgs(argz) * sc.parallelize(Seq("foo", "foo", "bar")).countByValue.closeAndDisplay() * } From 508705f9ad2192616680bbf1cfed99e51d49594b Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Mon, 31 Oct 2016 14:26:24 +0100 Subject: [PATCH 33/36] Remove duplicates --- flink/pom.xml | 7 ------- spark/pom.xml | 28 +++++++++------------------- zeppelin-zengine/pom.xml | 6 ------ 3 files changed, 9 insertions(+), 32 deletions(-) diff --git a/flink/pom.xml b/flink/pom.xml index 5ba7e33dd7e..1885a1f4148 100644 --- a/flink/pom.xml +++ b/flink/pom.xml @@ -313,13 +313,6 @@ runtime - - - - - org.apache.maven.plugins - maven-dependency-plugin - copy-artifact package diff --git a/spark/pom.xml b/spark/pom.xml index 46a46f1adf7..9e1549d7b2c 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -320,6 +320,9 @@ 1 false -Xmx1024m -XX:MaxPermSize=256m + + **/SparkRInterpreterTest.java + @@ -389,6 +392,12 @@ org.scala-tools maven-scala-plugin 2.15.2 + + + **/ZeppelinR.scala + **/SparkRBackend.scala + + compile @@ -427,25 +436,6 @@ - - org.scala-tools - maven-scala-plugin - - - **/ZeppelinR.scala - **/SparkRBackend.scala - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/SparkRInterpreterTest.java - - - diff --git a/zeppelin-zengine/pom.xml b/zeppelin-zengine/pom.xml index 84fb6248224..09a7bd678d0 100644 --- a/zeppelin-zengine/pom.xml +++ b/zeppelin-zengine/pom.xml @@ -168,12 +168,6 @@ 5.3.1 - - junit - junit - test - - org.reflections reflections From d8de7c8acf4eb6367787a22fe09a00a562a86aec Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Mon, 31 Oct 2016 14:26:36 +0100 Subject: [PATCH 34/36] Remove optional flink deps from Beam Flink dependency makes it impossible to compile scio for scala 2.11 given that there are not 2.11 artifacts for flink runner available yet. --- beam/pom.xml | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/beam/pom.xml b/beam/pom.xml index 3d36332f13e..dc31594187e 100644 --- a/beam/pom.xml +++ b/beam/pom.xml @@ -181,34 +181,6 @@ beam-runners-direct-java ${beam.beam.version} - - - org.apache.beam - beam-runners-flink_2.10 - ${beam.beam.version} - - - slf4j-log4j12 - org.slf4j - - - netty-all - io.netty - - - - - - org.apache.beam - beam-runners-flink_2.10-examples - ${beam.beam.version} - - - slf4j-log4j12 - org.slf4j - - - javax.servlet From 7e6fdec9b1aa25b5f9724aa2f134578a0d4cea6f Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Sat, 5 Nov 2016 23:31:24 +0100 Subject: [PATCH 35/36] Fix indentation --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c9abfbd1522..80b037394c1 100644 --- a/pom.xml +++ b/pom.xml @@ -591,7 +591,7 @@ - + scio scio From d6fbc4ee4427727d557f3fb5bca9de0f722a8ff1 Mon Sep 17 00:00:00 2001 From: Rafal Wojdyla Date: Sat, 5 Nov 2016 23:36:57 +0100 Subject: [PATCH 36/36] Add runner doc links --- docs/interpreter/scio.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/interpreter/scio.md b/docs/interpreter/scio.md index f908f73d0cb..83a48d06f3b 100644 --- a/docs/interpreter/scio.md +++ b/docs/interpreter/scio.md @@ -36,7 +36,7 @@ Scio is a Scala DSL for [Google Cloud Dataflow](https://github.com/GoogleCloudPl zeppelin.scio.argz --runner=InProcessPipelineRunner - Scio interpreter wide arguments + Scio interpreter wide arguments. Documentation: https://github.com/spotify/scio/wiki#options and https://cloud.google.com/dataflow/pipelines/specifying-exec-params zeppelin.scio.maxResult