diff --git a/.gitignore b/.gitignore index f110044b..a101a857 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ project/.gnupg/local* project/target target/ .history +.bsp # Java *.class diff --git a/.travis.yml b/.travis.yml index 6573eac2..e154407d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,29 +9,22 @@ addons: packages: - shellcheck -before_install: - - git fetch --tags - - docker pull exasol/docker-db:7.0.2 - - docker pull exasol/docker-db:6.2.9-d1 - - docker network create -d bridge --subnet 192.168.0.0/24 --gateway 192.168.0.1 dockernet - -matrix: - include: - - jdk: openjdk8 - scala: 2.11.12 - env: SPARK_VERSION="2.4.5" EXASOL_DOCKER_VERSION="6.2.9-d1" +jdk: + - openjdk11 - - jdk: openjdk8 - scala: 2.11.12 - env: SPARK_VERSION="2.4.5" EXASOL_DOCKER_VERSION="7.0.2" +scala: + - 2.12.12 - - jdk: openjdk8 - scala: 2.12.12 - env: SPARK_VERSION="2.4.5" EXASOL_DOCKER_VERSION="6.2.9-d1" +env: + - SPARK_VERSION="2.4.5" EXASOL_DOCKER_VERSION="6.2.12-d1" + - SPARK_VERSION="2.4.5" EXASOL_DOCKER_VERSION="7.0.4" + - SPARK_VERSION="3.0.1" EXASOL_DOCKER_VERSION="6.2.12-d1" + - SPARK_VERSION="3.0.1" EXASOL_DOCKER_VERSION="7.0.4" - - jdk: openjdk8 - scala: 2.12.12 - env: SPARK_VERSION="2.4.5" EXASOL_DOCKER_VERSION="7.0.2" +before_install: + - git fetch --tags + - docker pull "exasol/docker-db:$EXASOL_DOCKER_VERSION" + - docker network create -d bridge --subnet 192.168.0.0/24 --gateway 192.168.0.1 dockernet script: - travis_wait 30 ./scripts/ci.sh diff --git a/doc/changes/changelog.md b/doc/changes/changelog.md index 01de10a3..9ce0e825 100644 --- a/doc/changes/changelog.md +++ b/doc/changes/changelog.md @@ -1,5 +1,6 @@ # Releases +* [1.0.0](changes_1.0.0.md) * [0.3.2](changes_0.3.2.md) * [0.3.1](changes_0.3.1.md) * [0.3.0](changes_0.3.0.md) diff --git a/doc/changes/changes_1.0.0.md b/doc/changes/changes_1.0.0.md new file mode 100644 index 00000000..072f64ee --- /dev/null +++ b/doc/changes/changes_1.0.0.md @@ -0,0 +1,33 @@ +# Spark Exasol Connector 1.0.0, released 2020-12-DD + +## Features / Improvements + + +## Documentation + +* #85: Updated documentation with configuration for the Databricks cluster (PR #86) + +## Dependency Updates + +### Runtime Dependency Updates + +* Updated to `com.exasol:exasol-jdbc:7.0.4` (was `7.0.0`) +* Updated to `org.apache.spark:spark-core:3.0.1` (was `2.4.5`) +* Updated to `org.apache.spark:spark-sql:3.0.1` (was `2.4.5`) + +### Test Dependency Updates + +* Updated to `org.scalatest:scalatest:3.2.2` (was `3.2.2`) +* Updated to `org.testcontainers:jdbc:1.15.0` (was `1.14.3`) +* Updated to `com.holdenkarau:spark-testing-base:3.0.1_1.0.0` (was `2.4.5_0.14.0) +* Updated to `org.mockito:mockito-core:3.6.28` (was `3.5.13`) +* Updated to `com.dimafeng:testcontainers-scala:0.38.7` (was `0.38.4`) + +### Plugin Updates + +* Updated to `sbt.version:1.4.4` (was `1.3.13`) +* Updated to `org.wartremover:sbt-wartremover:2.4.13` (was `2.4.10`) +* Updated to `org.wartremover:sbt-wartremover-contrib:1.3.11` (was `1.3.8`) +* Updated to `com.jsuereth:sbt-pgp:2.0.2` (was `2.0.1`) +* Updated to `org.xerial.sbt:sbt-sonatype:3.9.5` (was `3.9.4`) +* Removed `io.get-coursier:sbt-coursier` diff --git a/doc/images/databricks-deployment.png b/doc/images/databricks-deployment.png new file mode 100644 index 00000000..463a9748 Binary files /dev/null and b/doc/images/databricks-deployment.png differ diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 5d5efaa2..e7302600 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -86,6 +86,29 @@ libraryDependencies += "com.exasol" % "spark-connector" %% "" Similar to the Java dependency, we add the resolver to the Exasol Artifactory so that the Exasol JDBC driver can be found. +### Spark Exasol Connector as Databricks Cluster Dependency + +Similar to using maven, you should provide maven artifact coordinates to the +[Databricks Workspace Library][databricks-install]. + +[databricks-install]: https://docs.databricks.com/libraries/workspace-libraries.html#maven-libraries + +databricks-deployment + +Go to your cluster, then to `Libraries`, and click `Install New`: + +- Select Maven as a Library Source. +- In the Coordinate field, enter artifact coordinates + `com.exasol:spark-connector_2.12:`. Please notice that we use the + Scala version 2.12, change it to 2.11 if your Databricks Runtime version + requires it. +- In the Repository field, enter the Exasol Artifactory + `https://maven.exasol.com/artifactory/exasol-releases`. +- Click Install. + +Please change the `` to one of the latest Spark Exasol Connector +releases. + ### Spark Exasol Connector With Spark Shell You can also integrate the Spark Exasol Connector to the Spark Shell. Provide diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 1e89cf62..381c7b4f 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -6,19 +6,19 @@ import sbt._ object Dependencies { // Versions - private val DefaultSparkVersion = "2.4.5" - private val ExasolJdbcVersion = "7.0.0" + private val DefaultSparkVersion = "3.0.1" + private val ExasolJdbcVersion = "7.0.4" - private val ScalaTestVersion = "3.2.2" + private val ScalaTestVersion = "3.2.3" private val ScalaTestMockitoVersion = "1.0.0-M2" - private val MockitoVersion = "3.5.13" - private val ContainersJdbcVersion = "1.14.3" - private val ContainersScalaVersion = "0.38.4" + private val MockitoVersion = "3.6.28" + private val ContainersJdbcVersion = "1.15.0" + private val ContainersScalaVersion = "0.38.7" private val sparkCurrentVersion = sys.env.getOrElse("SPARK_VERSION", DefaultSparkVersion) - private val SparkTestingBaseVersion = s"${sparkCurrentVersion}_0.14.0" + private val SparkTestingBaseVersion = s"${sparkCurrentVersion}_1.0.0" val Resolvers: Seq[Resolver] = Seq( "Exasol Releases" at "https://maven.exasol.com/artifactory/exasol-releases" diff --git a/project/Settings.scala b/project/Settings.scala index 01cdca43..4d1e9f7a 100644 --- a/project/Settings.scala +++ b/project/Settings.scala @@ -19,6 +19,7 @@ object Settings { buildSettings(scalaVersion) ++ miscSettings ++ assemblySettings ++ + apiDocSettings ++ scalaStyleSettings ++ Publishing.publishSettings() @@ -70,6 +71,31 @@ object Settings { } ) + def apiDocSettings(): Seq[Setting[_]] = Seq( + autoAPIMappings := true, + apiMappings ++= scalaInstance.value.libraryJars.collect { + case file if file.getName.startsWith("scala-library") && file.getName.endsWith(".jar") => + file -> url(s"http://www.scala-lang.org/api/${scalaVersion.value}/") + }.toMap ++ + // Since Java 9+ introduced modules, API links changed, update these + // links based on used Java modules. + Map( + file("/modules/java.sql") -> url( + "https://docs.oracle.com/en/java/javase/11/docs/api/java.sql" + ) + ), + // Override doc task in 2.11.x versions since linking external Java + // 11+ classes does not work. + (Compile / doc) := Def.taskDyn { + val docTaskValue = (Compile / doc).taskValue + if (scalaBinaryVersion.value == "2.11") { + (Compile / doc / target).toTask + } else { + Def.task(docTaskValue.value) + } + }.value + ) + def scalaStyleSettings(): Seq[Setting[_]] = { // Creates a Scalastyle task that runs with tests lazy val mainScalastyle = taskKey[Unit]("mainScalastyle") diff --git a/project/build.properties b/project/build.properties index 0837f7a1..7de0a938 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.3.13 +sbt.version=1.4.4 diff --git a/project/plugins.sbt b/project/plugins.sbt index f21b4c4e..92bee8b7 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,14 +1,10 @@ -// Adds a pure Scala artifact fetcher `coursier` -// https://github.com/coursier/coursier -addSbtPlugin("io.get-coursier" % "sbt-coursier" % "1.0.3") - // Adds a `wartremover` a flexible Scala code linting tool // http://github.com/puffnfresh/wartremover -addSbtPlugin("org.wartremover" % "sbt-wartremover" % "2.4.10") +addSbtPlugin("org.wartremover" % "sbt-wartremover" % "2.4.13") // Adds Contrib Warts // http://github.com/wartremover/wartremover-contrib/ -addSbtPlugin("org.wartremover" % "sbt-wartremover-contrib" % "1.3.8") +addSbtPlugin("org.wartremover" % "sbt-wartremover-contrib" % "1.3.11") // Adds a `assembly` task to create a fat JAR with all of its dependencies // https://github.com/sbt/sbt-assembly @@ -45,11 +41,11 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.2") // Adds a `sonatype` release tasks // https://github.com/xerial/sbt-sonatype -addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.9.4") +addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.9.5") // Adds a `gnu-pgp` plugin // https://github.com/sbt/sbt-pgp -addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.1") +addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.2") // Adds a `git` plugin // https://github.com/sbt/sbt-git diff --git a/sbtx b/sbtx index 11a73fbc..004a6170 100755 --- a/sbtx +++ b/sbtx @@ -34,10 +34,10 @@ set -o pipefail -declare -r sbt_release_version="1.3.13" -declare -r sbt_unreleased_version="1.4.0-M1" +declare -r sbt_release_version="1.4.4" +declare -r sbt_unreleased_version="1.4.4" -declare -r latest_213="2.13.3" +declare -r latest_213="2.13.4" declare -r latest_212="2.12.12" declare -r latest_211="2.11.12" declare -r latest_210="2.10.7" @@ -471,7 +471,7 @@ process_args() { -trace) require_arg integer "$1" "$2" && trace_level="$2" && shift 2 ;; -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; - -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; + -no-colors) addJava "-Dsbt.log.noformat=true" && addJava "-Dsbt.color=false" && shift ;; -sbt-create) sbt_create=true && shift ;; -sbt-dir) require_arg path "$1" "$2" && sbt_dir="$2" && shift 2 ;; -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; diff --git a/scripts/ci.sh b/scripts/ci.sh index e29a6fbb..da654931 100755 --- a/scripts/ci.sh +++ b/scripts/ci.sh @@ -10,7 +10,7 @@ BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/.. && pwd )" cd "$BASE_DIR" MAIN_SCALA_VERSION=2.12.12 -MAIN_SPARK_VERSION=2.4.5 +MAIN_SPARK_VERSION=3.0.1 if [[ -z "${TRAVIS_SCALA_VERSION:-}" ]]; then echo "Environment variable TRAVIS_SCALA_VERSION is not set" @@ -104,7 +104,7 @@ run_dependency_info () { ./sbtx \ -Dspark.currentVersion=$SPARK_VERSION \ ++$TRAVIS_SCALA_VERSION \ - dependencyUpdates pluginUpdates coursierDependencyTree + dependencyUpdates pluginUpdates dependencyTree } run_shell_check () { diff --git a/src/it/java/org/testcontainers/containers/ExasolDockerContainer.java b/src/it/java/org/testcontainers/containers/ExasolDockerContainer.java index fd56a073..601914ea 100644 --- a/src/it/java/org/testcontainers/containers/ExasolDockerContainer.java +++ b/src/it/java/org/testcontainers/containers/ExasolDockerContainer.java @@ -10,7 +10,7 @@ public class ExasolDockerContainer> extends JdbcDatabaseContainer { - private static final String DEFAULT_EXASOL_VERSION = "7.0.2"; + private static final String DEFAULT_EXASOL_VERSION = "7.0.4"; // wait for 5 minutes to startup private static final Integer EXASOL_STARTUP_TIME = 15 * 60; private String username = "sys"; diff --git a/src/main/scala/com/exasol/spark/rdd/ExasolRDD.scala b/src/main/scala/com/exasol/spark/rdd/ExasolRDD.scala index 1b3fddf1..11c0954b 100644 --- a/src/main/scala/com/exasol/spark/rdd/ExasolRDD.scala +++ b/src/main/scala/com/exasol/spark/rdd/ExasolRDD.scala @@ -13,11 +13,11 @@ import org.apache.spark.rdd.RDD import org.apache.spark.scheduler.SparkListener import org.apache.spark.scheduler.SparkListenerApplicationEnd import org.apache.spark.sql.Row +import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils import org.apache.spark.sql.types.StructType import com.exasol.jdbc.EXAConnection import com.exasol.jdbc.EXAResultSet -import com.exasol.spark.util.Converter import com.exasol.spark.util.ExasolConnectionManager /** @@ -155,7 +155,7 @@ class ExasolRDD( conn = manager.subConnection(partition.connectionUrl) resultSet = conn.DescribeResult(subHandle) - Converter.resultSetToRows(resultSet, querySchema) + JdbcUtils.resultSetToRows(resultSet, querySchema) } // scalastyle:on null return diff --git a/src/main/scala/com/exasol/spark/util/Converter.scala b/src/main/scala/com/exasol/spark/util/Converter.scala index f4c32356..80286664 100644 --- a/src/main/scala/com/exasol/spark/util/Converter.scala +++ b/src/main/scala/com/exasol/spark/util/Converter.scala @@ -6,7 +6,6 @@ import java.sql.ResultSet import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.types._ @@ -21,16 +20,6 @@ import org.apache.spark.unsafe.types.UTF8String */ object Converter extends Logging { - /** - * Converts a [[java.sql.ResultSet]] into an iterator of - * [[org.apache.spark.sql.Row]]-s. - */ - def resultSetToRows(resultSet: ResultSet, schema: StructType): Iterator[Row] = { - val encoder = RowEncoder(schema).resolveAndBind() - val internalRows = resultSetToSparkInternalRows(resultSet, schema) - internalRows.map(encoder.fromRow) - } - @SuppressWarnings(Array("org.wartremover.warts.AsInstanceOf")) def resultSetToSparkInternalRows( resultSet: ResultSet, diff --git a/src/test/scala/com/exasol/spark/util/FiltersSuite.scala b/src/test/scala/com/exasol/spark/util/FiltersSuite.scala index 69560d86..756b6360 100644 --- a/src/test/scala/com/exasol/spark/util/FiltersSuite.scala +++ b/src/test/scala/com/exasol/spark/util/FiltersSuite.scala @@ -41,7 +41,7 @@ class FiltersSuite extends AnyFunSuite with Matchers { | str_col <= '123' |AND int_col > 42 |AND (in_col NOT IN (1,2,3)) - """.stripMargin.lines.mkString(" ").trim + """.stripMargin.replaceAll("\\s+", " ").trim() assert(createWhereClause(testSchema, filters) === expected) } @@ -78,7 +78,7 @@ class FiltersSuite extends AnyFunSuite with Matchers { |AND (str_col LIKE '%inside%') |AND (str_col LIKE 'prefix%') |AND (in_col IN (1,2,3)) - """.stripMargin.lines.mkString(" ").trim + """.stripMargin.replaceAll("\\s+", " ").trim() assert(createWhereClause(testSchema, filters) === expected) } @@ -108,7 +108,7 @@ class FiltersSuite extends AnyFunSuite with Matchers { |AND double_col = 100.0 |AND date_col = date '2018-01-01' |AND datetime_col = timestamp '2018-01-01 00:00:59.123' - """.stripMargin.lines.mkString(" ").trim + """.stripMargin.replaceAll("\\s+", " ").trim() assert(createWhereClause(testSchema, filters) === expected) } @@ -126,7 +126,7 @@ class FiltersSuite extends AnyFunSuite with Matchers { |((str_col = 'abc') OR (int_col = 123)) |AND (((NOT ((int_col IS NULL)))) OR ((str_col IS NOT NULL))) |AND ((str_col = 'xyz') OR (((float_col = 3.14) AND (int_col != 3)))) - """.stripMargin.lines.mkString(" ").trim + """.stripMargin.replaceAll("\\s+", " ").trim() assert(createWhereClause(testSchema, filters) === expected) }