From 4cb217f74e7c1c765f186e13450eeb0c74ed17b4 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Thu, 16 May 2024 21:48:28 +0800 Subject: [PATCH] Upgrade to Spark 3.5.1, use cloudnativek8s Spark base image, resolve vulnerabilities, fix bug in index.js with missing import, add in Java options to run with Java 17 --- Dockerfile | 5 ++-- README.md | 6 ++++ app/build.gradle.kts | 30 ++++++++++++++++++- app/src/main/resources/ui/index.js | 1 + .../script/responses/connections.json | 0 .../script/responses/execution_history.json | 0 .../resources/script/responses/plans.json | 0 .../resources/script/share_js.sh | 0 gradle.properties | 4 +-- local-docker-build.sh | 20 +++++++++++++ misc/vulns/check_vulns.sh | 7 +++++ script/run-data-caterer.sh | 12 ++++++-- 12 files changed, 76 insertions(+), 9 deletions(-) rename app/src/{main => test}/resources/script/responses/connections.json (100%) rename app/src/{main => test}/resources/script/responses/execution_history.json (100%) rename app/src/{main => test}/resources/script/responses/plans.json (100%) rename app/src/{main => test}/resources/script/share_js.sh (100%) create mode 100644 local-docker-build.sh create mode 100644 misc/vulns/check_vulns.sh diff --git a/Dockerfile b/Dockerfile index d834fbff..0291394f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,7 @@ -ARG SPARK_VERSION=3.5.0 -FROM apache/spark:$SPARK_VERSION +ARG SPARK_VERSION=3.5.1 +FROM cloudnativek8s/spark:3.5.1-b1.0.18 USER root -RUN groupadd -g 1001 app && useradd -m -u 1001 -g app app RUN mkdir -p /opt/app /opt/DataCaterer/connection /opt/DataCaterer/plan /opt/DataCaterer/execution /opt/DataCaterer/report RUN chown -R app:app /opt/app /opt/DataCaterer/connection /opt/DataCaterer/plan /opt/DataCaterer/execution /opt/DataCaterer/report COPY --chown=app:app script /opt/app diff --git a/README.md b/README.md index d052ae9a..6af4a74b 100644 --- a/README.md +++ b/README.md @@ -191,3 +191,9 @@ jpackage "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-windows.cfg" # Linux jpackage "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-linux.cfg" ``` + +##### Java 17 VM Options + +```shell +--add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED +``` \ No newline at end of file diff --git a/app/build.gradle.kts b/app/build.gradle.kts index 253bd25b..7d0159e3 100644 --- a/app/build.gradle.kts +++ b/app/build.gradle.kts @@ -53,9 +53,37 @@ configurations { dependencies { jpackageDep("org.scala-lang:scala-library:$scalaSpecificVersion") - jpackageDep("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") + jpackageDep("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") { + exclude(group = "com.google.protobuf") + exclude(module = "netty-codec-http") + exclude(module = "woodstox-core") + exclude(module = "nimbus-jose-jwt") + exclude(module = "commons-net") + exclude(module = "netty-handler") + exclude(module = "json-smart") + exclude(module = "avro") + exclude(module = "commons-compress") + exclude(module = "commons-configuration2") + exclude(module = "jetty-http") + exclude(module = "ion-jav") + } jpackageDep(project(":api")) + // vulnerabilities in Spark + basicImpl("com.google.protobuf:protobuf-java:3.21.7") + basicImpl("io.netty:netty-codec-http:4.1.109.Final") + basicImpl("com.fasterxml.woodstox:woodstox-core:5.4.0") + basicImpl("com.nimbusds:nimbus-jose-jwt:9.37.2") + basicImpl("commons-net:commons-net:3.9.0") + basicImpl("io.netty:netty-handler:4.1.109.Final") + basicImpl("net.minidev:json-smart:2.4.9") + basicImpl("org.apache.avro:avro:1.11.3") + basicImpl("org.apache.commons:commons-compress:1.26.0") + basicImpl("org.apache.commons:commons-configuration2:2.10.1") + basicImpl("org.codehaus.jettison:jettison:1.5.4") + basicImpl("org.eclipse.jetty:jetty-http:9.4.54.v20240208") + //basicImpl("software.amazon.ion:ion-java:1.5.1") //should use: basicImpl("com.amazon.ion:ion-java:1.11.8") + // connectors // postgres basicImpl("org.postgresql:postgresql:42.6.0") diff --git a/app/src/main/resources/ui/index.js b/app/src/main/resources/ui/index.js index 8c79f516..eb1e4601 100644 --- a/app/src/main/resources/ui/index.js +++ b/app/src/main/resources/ui/index.js @@ -25,6 +25,7 @@ import { dispatchEvent, executePlan, getDataConnectionsAndAddToSelect, + getOverrideConnectionOptionsAsMap, manualContainerDetails, wait } from "./shared.js"; diff --git a/app/src/main/resources/script/responses/connections.json b/app/src/test/resources/script/responses/connections.json similarity index 100% rename from app/src/main/resources/script/responses/connections.json rename to app/src/test/resources/script/responses/connections.json diff --git a/app/src/main/resources/script/responses/execution_history.json b/app/src/test/resources/script/responses/execution_history.json similarity index 100% rename from app/src/main/resources/script/responses/execution_history.json rename to app/src/test/resources/script/responses/execution_history.json diff --git a/app/src/main/resources/script/responses/plans.json b/app/src/test/resources/script/responses/plans.json similarity index 100% rename from app/src/main/resources/script/responses/plans.json rename to app/src/test/resources/script/responses/plans.json diff --git a/app/src/main/resources/script/share_js.sh b/app/src/test/resources/script/share_js.sh similarity index 100% rename from app/src/main/resources/script/share_js.sh rename to app/src/test/resources/script/share_js.sh diff --git a/gradle.properties b/gradle.properties index 0149ff95..8f9f191b 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,7 +1,7 @@ groupId=io.github.data-catering -version=0.10.1 +version=0.10.2 scalaVersion=2.12 scalaSpecificVersion=2.12.15 -sparkVersion=3.5.0 +sparkVersion=3.5.1 sparkMajorVersion=3.5 diff --git a/local-docker-build.sh b/local-docker-build.sh new file mode 100644 index 00000000..1bb7a203 --- /dev/null +++ b/local-docker-build.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +version=$(grep version gradle.properties | cut -d= -f2) +sparkVersion=$(grep sparkVersion gradle.properties | cut -d= -f2) + +echo "Creating API jar" +gradle clean :api:shadowJar + +echo "Creating data caterer jar, version=$version" +gradle build shadowJar -x test +build_app=$? +if [[ "$build_app" -ne 0 ]] ; then + echo "Failed to build app, exiting" + exit 1 +fi + +docker build \ + --build-arg "APP_VERSION=$version" \ + --build-arg "SPARK_VERSION=$sparkVersion" \ + -t datacatering/data-caterer-basic:$version . diff --git a/misc/vulns/check_vulns.sh b/misc/vulns/check_vulns.sh new file mode 100644 index 00000000..888a65f1 --- /dev/null +++ b/misc/vulns/check_vulns.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +echo "Checking for vulnerabilities in JAR" + +trivy rootfs ../../app/build/libs + +#gradle dependencyInsight --dependency diff --git a/script/run-data-caterer.sh b/script/run-data-caterer.sh index b9c093be..e3640307 100644 --- a/script/run-data-caterer.sh +++ b/script/run-data-caterer.sh @@ -2,13 +2,20 @@ DATA_CATERER_MASTER="${DATA_CATERER_MASTER:-local[*]}" DEPLOY_MODE="${DEPLOY_MODE:-client}" JAVA_OPTS="-Dlog4j.configurationFile=file:///opt/app/log4j2.properties -Djdk.module.illegalAccess=deny" +JAVA_17_OPTS="--add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED" DRIVER_MEMORY="${DRIVER_MEMORY:-2g}" EXECUTOR_MEMORY="${EXECUTOR_MEMORY:-2g}" ALL_OPTS="$ADDITIONAL_OPTS --conf \"spark.driver.extraJavaOptions=$JAVA_OPTS\" --conf \"spark.executor.extraJavaOptions=$JAVA_OPTS\"" if [[ "$DEPLOY_MODE" -eq "standalone" ]] ; then echo "Running Data Caterer as a standalone application" - java -cp "/opt/spark/jars/*:/opt/app/job.jar" io.github.datacatering.datacaterer.core.ui.DataCatererUI + CMD=( + java + "$JAVA_OPTS" + "$JAVA_17_OPTS" + -cp "/opt/spark/jars/*:/opt/app/job.jar" + io.github.datacatering.datacaterer.core.ui.DataCatererUI + ) else echo "Running Data Caterer as a Spark job" CMD=( @@ -21,7 +28,6 @@ else "$ALL_OPTS" file:///opt/app/job.jar ) - - eval "${CMD[@]}" fi +eval "${CMD[@]}"