From 1d7f36c422a2d35c08779ae1e618bcebfd503856 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 12:12:29 -0800 Subject: [PATCH 01/22] Use an nginx server for remote jars tests. --- .gitignore | 1 + e2e/runner.sh | 11 ++ .../docker-file-server/.gitignore | 1 + .../docker-file-server/Dockerfile | 4 + .../docker-file-server/nginx.conf | 34 +++++ integration-test/pom.xml | 2 +- .../k8s/integrationtest/KubernetesSuite.scala | 16 ++- .../SparkExamplesFileServerRunner.scala | 122 ++++++++++++++++++ 8 files changed, 185 insertions(+), 6 deletions(-) create mode 100644 integration-test/docker-file-server/.gitignore create mode 100644 integration-test/docker-file-server/Dockerfile create mode 100644 integration-test/docker-file-server/nginx.conf create mode 100644 integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala diff --git a/.gitignore b/.gitignore index dc020f2..40f73a7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .idea/ spark/ +spark integration-test/target/ *.class *.log diff --git a/e2e/runner.sh b/e2e/runner.sh index aded856..44a50f8 100755 --- a/e2e/runner.sh +++ b/e2e/runner.sh @@ -90,21 +90,31 @@ git checkout -B $BRANCH origin/$BRANCH TAG=$(git rev-parse HEAD | cut -c -6) echo "Spark distribution built at SHA $TAG" +FILE_SERVER_IMAGE="$IMAGE_REPO/spark-examples-file-server:$TAG" +FILE_SERVER_BUILD_DIR="$TEST_ROOT/integration-test/docker-file-server" +rm -rf $FILE_SERVER_BUILD_DIR/jars +mkdir -p $FILE_SERVER_BUILD_DIR/jars +cp $SPARK_REPO_ROOT/dist/examples/jars/spark-examples*.jar $FILE_SERVER_BUILD_DIR/jars/. cd $SPARK_REPO_ROOT/dist if [[ $DEPLOY_MODE == cloud ]] ; then + docker build -t $FILE_SERVER_IMAGE "$TEST_ROOT/integration-test/docker-file-server" ./sbin/build-push-docker-images.sh -r $IMAGE_REPO -t $TAG build if [[ $IMAGE_REPO == gcr.io* ]] ; then gcloud docker -- push $IMAGE_REPO/spark-driver:$TAG && \ gcloud docker -- push $IMAGE_REPO/spark-executor:$TAG && \ gcloud docker -- push $IMAGE_REPO/spark-init:$TAG + gcloud docker -- push $FILE_SERVER_IMAGE else ./sbin/build-push-docker-images.sh -r $IMAGE_REPO -t $TAG push + docker push $FILE_SERVER_IMAGE fi else # -m option for minikube. + eval $(minikube docker-env) + docker build -t $FILE_SERVER_IMAGE "$TEST_ROOT/integration-test/docker-file-server" ./sbin/build-push-docker-images.sh -m -r $IMAGE_REPO -t $TAG build fi @@ -112,6 +122,7 @@ cd $TEST_ROOT/integration-test $SPARK_REPO_ROOT/build/mvn clean -Ddownload.plugin.skip=true integration-test \ -Dspark-distro-tgz=$SPARK_REPO_ROOT/*.tgz \ -DextraScalaTestArgs="-Dspark.kubernetes.test.master=k8s://$MASTER \ + -Dspark.docker.test.fileServerImage=$FILE_SERVER_IMAGE \ -Dspark.docker.test.driverImage=$IMAGE_REPO/spark-driver:$TAG \ -Dspark.docker.test.executorImage=$IMAGE_REPO/spark-executor:$TAG \ -Dspark.docker.test.initContainerImage=$IMAGE_REPO/spark-init:$TAG" || : diff --git a/integration-test/docker-file-server/.gitignore b/integration-test/docker-file-server/.gitignore new file mode 100644 index 0000000..2723de6 --- /dev/null +++ b/integration-test/docker-file-server/.gitignore @@ -0,0 +1 @@ +jars diff --git a/integration-test/docker-file-server/Dockerfile b/integration-test/docker-file-server/Dockerfile new file mode 100644 index 0000000..537748d --- /dev/null +++ b/integration-test/docker-file-server/Dockerfile @@ -0,0 +1,4 @@ +FROM nginx:alpine + +COPY jars /opt/spark/jars +COPY nginx.conf /etc/nginx/nginx.conf diff --git a/integration-test/docker-file-server/nginx.conf b/integration-test/docker-file-server/nginx.conf new file mode 100644 index 0000000..ce0a45f --- /dev/null +++ b/integration-test/docker-file-server/nginx.conf @@ -0,0 +1,34 @@ +user nginx; +worker_processes 1; + +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + server { + root /opt/spark/jars; + location /ping { + return 200 'pong'; + add_header Content-Type text/plain; + } + } + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + #tcp_nopush on; + + keepalive_timeout 65; + + #gzip on; +} diff --git a/integration-test/pom.xml b/integration-test/pom.xml index 9375d91..8d15876 100644 --- a/integration-test/pom.xml +++ b/integration-test/pom.xml @@ -134,7 +134,7 @@ /bin/sh -c - rm -rf spark-distro; mkdir spark-distro-tmp; cd spark-distro-tmp; tar xfz ${spark-distro-tgz}; mv * ../spark-distro; cd ..; rm -rf spark-distro-tmp + rm -rf spark-distro; mkdir spark-distro-tmp; cd spark-distro-tmp; tar xfz ${spark-distro-tgz}; mv * ../spark-distro; cd ..; rm -rf spark-distro-tmp; rm -rf docker-file-server/jars; mkdir -p docker-file-server/jars; cp spark-distro/examples/jars/spark-examples*.jar docker-file-server/jars/. diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 65b6d95..39bf0d3 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -17,12 +17,12 @@ package org.apache.spark.deploy.k8s.integrationtest import java.io.File +import java.net.URI import java.nio.file.Paths import java.util.UUID import java.util.regex.Pattern import scala.collection.JavaConverters._ - import com.google.common.io.PatternFilenameFilter import io.fabric8.kubernetes.api.model.{Container, Pod} import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite} @@ -41,6 +41,7 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") private var kubernetesTestComponents: KubernetesTestComponents = _ private var sparkAppConf: SparkAppConf = _ + private var remoteExamplesJarUri: URI = _ private val driverImage = System.getProperty( "spark.docker.test.driverImage", @@ -52,7 +53,6 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit "spark.docker.test.initContainerImage", "spark-init:latest") - override def beforeAll(): Unit = { testBackend.initialize() kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) @@ -72,6 +72,8 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit .set(INIT_CONTAINER_DOCKER_IMAGE, tagImage("spark-init")) .set("spark.kubernetes.executor.label.spark-app-locator", APP_LOCATOR_LABEL) kubernetesTestComponents.createNamespace() + remoteExamplesJarUri = SparkExamplesFileServerRunner + .launchServerAndGetUriForExamplesJar(kubernetesTestComponents) } after { @@ -102,6 +104,11 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit runSparkPiAndVerifyCompletion(appArgs = Array("5")) } + test("Run SparkPi using the remote example jar.") { + sparkAppConf.set("spark.kubernetes.initContainer.image", initContainerImage) + runSparkPiAndVerifyCompletion(appResource = remoteExamplesJarUri.toString) + } + test("Run SparkPi with custom driver pod name, labels, annotations, and environment variables.") { doMinikubeCheck sparkAppConf @@ -169,8 +176,8 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit createTestSecret() - runSparkPageRankAndVerifyCompletion( - appArgs = Array(CONTAINER_LOCAL_DOWNLOADED_PAGE_RANK_DATA_FILE), + runSparkPiAndVerifyCompletion( + appResource = remoteExamplesJarUri.toString, driverPodChecker = (driverPod: Pod) => { doBasicDriverPodCheck(driverPod) checkTestSecret(driverPod, withInitContainer = true) @@ -194,7 +201,6 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit driverPodChecker, executorPodChecker) } - private def runSparkPageRankAndVerifyCompletion( appResource: String = CONTAINER_LOCAL_SPARK_DISTRO_EXAMPLES_JAR, driverPodChecker: Pod => Unit = doBasicDriverPodCheck, diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala b/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala new file mode 100644 index 0000000..fa96b1f --- /dev/null +++ b/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest + +import java.net.{URI, URL} +import java.nio.file.Paths +import java.util.UUID + +import io.fabric8.kubernetes.api.model.{Endpoints, Pod, Service} +import org.apache.http.client.utils.URIBuilder + +private[spark] object SparkExamplesFileServerRunner { + + private val fileServerImage = System.getProperty( + "spark.docker.test.fileServerImage", "spark-examples-file-server:latest") + private val fileServerExampleJarsDir = Paths.get("docker-file-server", "jars") + require( + fileServerExampleJarsDir + .toFile + .listFiles() + .exists(file => file.getName.startsWith("spark-examples")), + s"No spark-examples jar found in $fileServerExampleJarsDir.") + require( + fileServerExampleJarsDir + .toFile + .listFiles() + .count(file => file.getName.startsWith("spark-examples")) == 1, + s"Multiple spark-examples jars found in $fileServerExampleJarsDir.") + private val fileServerExampleJar = Paths.get("docker-file-server", "jars") + .toFile + .listFiles() + .filter(file => file.getName.startsWith("spark-examples"))(0) + .getName + private val fileServerPodLocatorLabelKey = "fileServerLocator" + private val fileServerPodLocatorLabelValue = UUID.randomUUID().toString.replaceAll("-", "") + private val fileServerName = "spark-examples-file-server" + + def launchServerAndGetUriForExamplesJar( + kubernetesTestComponents: KubernetesTestComponents): URI = { + val podReadinessWatcher = new SparkReadinessWatcher[Pod] + Utils.tryWithResource( + kubernetesTestComponents + .kubernetesClient + .pods() + .withName(fileServerName) + .watch(podReadinessWatcher)) { _ => + kubernetesTestComponents.kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(fileServerName) + .addToLabels(fileServerPodLocatorLabelKey, fileServerPodLocatorLabelValue) + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName("main") + .withImage(fileServerImage) + .withImagePullPolicy("Never") + .withNewReadinessProbe() + .withNewHttpGet() + .withNewPort(80) + .withPath("/ping") + .endHttpGet() + .endReadinessProbe() + .endContainer() + .endSpec() + .done() + podReadinessWatcher.waitUntilReady() + } + val endpointsReadinessWatcher = new SparkReadinessWatcher[Endpoints] + Utils.tryWithResource( + kubernetesTestComponents + .kubernetesClient + .endpoints() + .withName(fileServerName) + .watch(endpointsReadinessWatcher)) { _ => + kubernetesTestComponents.kubernetesClient.services().createNew() + .withNewMetadata() + .withName(fileServerName) + .endMetadata() + .withNewSpec() + .addToSelector(fileServerPodLocatorLabelKey, fileServerPodLocatorLabelValue) + .addNewPort() + .withName("file-server-port") + .withNewTargetPort(80) + .withPort(80) + .endPort() + .withType("NodePort") + .endSpec() + .done() + endpointsReadinessWatcher.waitUntilReady() + } + val resolvedNodePort = kubernetesTestComponents + .kubernetesClient + .services() + .withName(fileServerName) + .get() + .getSpec + .getPorts + .get(0) + .getNodePort + val masterHostname = URI.create(kubernetesTestComponents.clientConfig.getMasterUrl).getHost + new URIBuilder() + .setHost(masterHostname) + .setPort(resolvedNodePort) + .setScheme("http") + .setPath(s"/$fileServerExampleJar") + .build() + } +} From 8c067af0ec017166b9d320f1bb87d0679caacd89 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 12:57:04 -0800 Subject: [PATCH 02/22] Moves all integration test setup logic to Maven and scripts. The Kubernetes integration tests now always expect an image to be pre-built, so we no longer build images with Scala code. Maven's pre-integration-test invokes a single script to bootstrap the environment with the built images, etc. In the transition we try to keep as much of the same semantics as possible. --- .gitignore | 9 +- build/mvn | 158 +++++++++++++++ .../.gitignore | 0 .../Dockerfile | 0 .../nginx.conf | 0 e2e/e2e-prow.sh | 39 ---- e2e/runner.sh | 130 ------------ .../docker/KubernetesSuiteDockerManager.scala | 189 ------------------ integration-test/pom.xml => pom.xml | 51 ++++- scripts/build-spark.sh | 33 +++ e2e/e2e-minikube.sh => scripts/clone-spark.sh | 30 ++- scripts/parse-arguments.sh | 82 ++++++++ scripts/prepare-docker-images.sh | 63 ++++++ scripts/setup-integration-test-env.sh | 34 ++++ scripts/write-docker-tag.sh | 30 +++ .../test/resources/log4j.properties | 0 .../k8s/integrationtest/KubernetesSuite.scala | 62 +++--- .../KubernetesTestComponents.scala | 17 +- .../deploy/k8s/integrationtest/Logging.scala | 0 .../k8s/integrationtest/ProcessUtils.scala | 0 .../SparkExamplesFileServerRunner.scala | 7 +- .../SparkReadinessWatcher.scala | 0 .../deploy/k8s/integrationtest/Utils.scala | 0 .../backend/GCE/GCETestBackend.scala | 4 - .../backend/IntegrationTestBackend.scala | 1 - .../backend/minikube/Minikube.scala | 12 -- .../minikube/MinikubeTestBackend.scala | 16 +- .../deploy/k8s/integrationtest/config.scala | 19 ++ .../k8s/integrationtest/constants.scala | 3 - 29 files changed, 527 insertions(+), 462 deletions(-) create mode 100755 build/mvn rename {integration-test/docker-file-server => docker-file-server}/.gitignore (100%) rename {integration-test/docker-file-server => docker-file-server}/Dockerfile (100%) rename {integration-test/docker-file-server => docker-file-server}/nginx.conf (100%) delete mode 100755 e2e/e2e-prow.sh delete mode 100755 e2e/runner.sh delete mode 100644 integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/docker/KubernetesSuiteDockerManager.scala rename integration-test/pom.xml => pom.xml (70%) create mode 100755 scripts/build-spark.sh rename e2e/e2e-minikube.sh => scripts/clone-spark.sh (54%) create mode 100755 scripts/parse-arguments.sh create mode 100755 scripts/prepare-docker-images.sh create mode 100755 scripts/setup-integration-test-env.sh create mode 100755 scripts/write-docker-tag.sh rename {integration-test/src => src}/test/resources/log4j.properties (100%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala (86%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala (89%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/Logging.scala (100%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala (100%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala (95%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkReadinessWatcher.scala (100%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala (100%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/GCE/GCETestBackend.scala (92%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala (97%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala (89%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala (66%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala (62%) rename {integration-test/src => src}/test/scala/org/apache/spark/deploy/k8s/integrationtest/constants.scala (91%) diff --git a/.gitignore b/.gitignore index 40f73a7..aed5cf2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,12 @@ .idea/ spark/ -spark -integration-test/target/ +spark-dist +target/ +build/*.jar +build/apache-maven* +build/scala* +build/zinc* *.class *.log *.iml +*.swp diff --git a/build/mvn b/build/mvn new file mode 100755 index 0000000..c6051ec --- /dev/null +++ b/build/mvn @@ -0,0 +1,158 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Determine the current working directory +_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +# Preserve the calling directory +_CALLING_DIR="$(pwd)" +# Options used during compilation +_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m" + +# Installs any application tarball given a URL, the expected tarball name, +# and, optionally, a checkable binary path to determine if the binary has +# already been installed +## Arg1 - URL +## Arg2 - Tarball Name +## Arg3 - Checkable Binary +install_app() { + local remote_tarball="$1/$2" + local local_tarball="${_DIR}/$2" + local binary="${_DIR}/$3" + + # setup `curl` and `wget` silent options if we're running on Jenkins + local curl_opts="-L" + local wget_opts="" + if [ -n "$AMPLAB_JENKINS" ]; then + curl_opts="-s ${curl_opts}" + wget_opts="--quiet ${wget_opts}" + else + curl_opts="--progress-bar ${curl_opts}" + wget_opts="--progress=bar:force ${wget_opts}" + fi + + if [ -z "$3" -o ! -f "$binary" ]; then + # check if we already have the tarball + # check if we have curl installed + # download application + [ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \ + echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \ + curl ${curl_opts} "${remote_tarball}" > "${local_tarball}" + # if the file still doesn't exist, lets try `wget` and cross our fingers + [ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \ + echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \ + wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}" + # if both were unsuccessful, exit + [ ! -f "${local_tarball}" ] && \ + echo -n "ERROR: Cannot download $2 with cURL or wget; " && \ + echo "please install manually and try again." && \ + exit 2 + cd "${_DIR}" && tar -xzf "$2" + rm -rf "$local_tarball" + fi +} + +# Determine the Maven version from the root pom.xml file and +# install maven under the build/ folder if needed. +install_mvn() { + local MVN_VERSION=`grep "" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` + echo $MVN_VERSION + MVN_BIN="$(command -v mvn)" + if [ "$MVN_BIN" ]; then + local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')" + fi + # See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers + function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; } + if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then + local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua?action=download&filename='} + + install_app \ + "${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries" \ + "apache-maven-${MVN_VERSION}-bin.tar.gz" \ + "apache-maven-${MVN_VERSION}/bin/mvn" + + MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn" + fi +} + +# Install zinc under the build/ folder +install_zinc() { + local zinc_path="zinc-0.3.15/bin/zinc" + [ ! -f "${_DIR}/${zinc_path}" ] && ZINC_INSTALL_FLAG=1 + local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.typesafe.com} + + install_app \ + "${TYPESAFE_MIRROR}/zinc/0.3.15" \ + "zinc-0.3.15.tgz" \ + "${zinc_path}" + ZINC_BIN="${_DIR}/${zinc_path}" +} + +# Determine the Scala version from the root pom.xml file, set the Scala URL, +# and, with that, download the specific version of Scala necessary under +# the build/ folder +install_scala() { + # determine the Scala version used in Spark + local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` + local scala_bin="${_DIR}/scala-${scala_version}/bin/scala" + local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.typesafe.com} + + install_app \ + "${TYPESAFE_MIRROR}/scala/${scala_version}" \ + "scala-${scala_version}.tgz" \ + "scala-${scala_version}/bin/scala" + + SCALA_COMPILER="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-compiler.jar" + SCALA_LIBRARY="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-library.jar" +} + +# Setup healthy defaults for the Zinc port if none were provided from +# the environment +ZINC_PORT=${ZINC_PORT:-"3030"} + +# Remove `--force` for backward compatibility. +if [ "$1" == "--force" ]; then + echo "WARNING: '--force' is deprecated and ignored." + shift +fi + +# Install the proper version of Scala, Zinc and Maven for the build +install_zinc +install_scala +install_mvn + +# Reset the current working directory +cd "${_CALLING_DIR}" + +# Now that zinc is ensured to be installed, check its status and, if its +# not running or just installed, start it +if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`"${ZINC_BIN}" -status -port ${ZINC_PORT}`" ]; then + export ZINC_OPTS=${ZINC_OPTS:-"$_COMPILE_JVM_OPTS"} + "${ZINC_BIN}" -shutdown -port ${ZINC_PORT} + "${ZINC_BIN}" -start -port ${ZINC_PORT} \ + -scala-compiler "${SCALA_COMPILER}" \ + -scala-library "${SCALA_LIBRARY}" &>/dev/null +fi + +# Set any `mvn` options if not already present +export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"} + +echo "Using \`mvn\` from path: $MVN_BIN" 1>&2 + +# Last, call the `mvn` command as usual +${MVN_BIN} -DzincPort=${ZINC_PORT} "$@" diff --git a/integration-test/docker-file-server/.gitignore b/docker-file-server/.gitignore similarity index 100% rename from integration-test/docker-file-server/.gitignore rename to docker-file-server/.gitignore diff --git a/integration-test/docker-file-server/Dockerfile b/docker-file-server/Dockerfile similarity index 100% rename from integration-test/docker-file-server/Dockerfile rename to docker-file-server/Dockerfile diff --git a/integration-test/docker-file-server/nginx.conf b/docker-file-server/nginx.conf similarity index 100% rename from integration-test/docker-file-server/nginx.conf rename to docker-file-server/nginx.conf diff --git a/e2e/e2e-prow.sh b/e2e/e2e-prow.sh deleted file mode 100755 index 45a8c2b..0000000 --- a/e2e/e2e-prow.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -### This script is used by Kubernetes Test Infrastructure to run integration tests. -### See documenation at https://github.com/kubernetes/test-infra/tree/master/prow -### To run the integration tests yourself, use e2e/runner.sh. - -set -ex - -# Install basic dependencies -echo "deb http://http.debian.net/debian jessie-backports main" >> /etc/apt/sources.list -apt-get update && apt-get install -y curl wget git tar -apt-get install -t jessie-backports -y openjdk-8-jdk - -# Set up config. -master=$(kubectl cluster-info | head -n 1 | grep -oE "https?://[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}(:[0-9]+)?") -repo="https://github.com/apache/spark" - -# Special GCP project for publishing docker images built by test. -image_repo="gcr.io/spark-testing-191023" -cd "$(dirname "$0")"/../ -./e2e/runner.sh -m $master -r $repo -i $image_repo -d cloud - -# Copy out the junit xml files for consumption by k8s test-infra. -ls -1 ./integration-test/target/surefire-reports/*.xml | cat -n | while read n f; do cp "$f" "/workspace/_artifacts/junit_0$n.xml"; done diff --git a/e2e/runner.sh b/e2e/runner.sh deleted file mode 100755 index 44a50f8..0000000 --- a/e2e/runner.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -usage () { - echo "Usage:" - echo " ./e2e/runner.sh -h Display this help message." - echo " ./e2e/runner.sh -m -r -b -i -d [minikube|cloud]" - echo " note that you must have kubectl configured to access the specified" - echo " . Also you must have access to the . " - echo " The deployment mode can be specified using the 'd' flag." -} - -cd "$(dirname "$0")" - -### Set sensible defaults ### -REPO="https://github.com/apache/spark" -IMAGE_REPO="docker.io/kubespark" -DEPLOY_MODE="minikube" -BRANCH="master" - -### Parse options ### -while getopts h:m:r:i:d:b: option -do - case "${option}" - in - h) - usage - exit 0 - ;; - m) MASTER=${OPTARG};; - r) REPO=${OPTARG};; - b) BRANCH=${OPTARG};; - i) IMAGE_REPO=${OPTARG};; - d) DEPLOY_MODE=${OPTARG};; - \? ) - echo "Invalid Option: -$OPTARG" 1>&2 - exit 1 - ;; - esac -done - -### Ensure cluster is set. -if [ -z "$MASTER" ] -then - echo "Missing master-url (-m) argument." - echo "" - usage - exit -fi - -### Ensure deployment mode is minikube/cloud. -if [[ $DEPLOY_MODE != minikube && $DEPLOY_MODE != cloud ]]; -then - echo "Invalid deployment mode $DEPLOY_MODE" - usage - exit 1 -fi - -echo "Running tests on cluster $MASTER against $REPO." -echo "Spark images will be created in $IMAGE_REPO" - -set -ex -TEST_ROOT=$(git rev-parse --show-toplevel) -SPARK_REPO_ROOT="$TEST_ROOT/spark" -# clone spark distribution if needed. -if [ -d "$SPARK_REPO_ROOT" ]; -then - (cd $SPARK_REPO_ROOT && git pull origin $BRANCH); -else - git clone $REPO $SPARK_REPO_ROOT -fi - -cd $SPARK_REPO_ROOT -git checkout -B $BRANCH origin/$BRANCH -./dev/make-distribution.sh --tgz -Phadoop-2.7 -Pkubernetes -DskipTests -TAG=$(git rev-parse HEAD | cut -c -6) -echo "Spark distribution built at SHA $TAG" - -FILE_SERVER_IMAGE="$IMAGE_REPO/spark-examples-file-server:$TAG" -FILE_SERVER_BUILD_DIR="$TEST_ROOT/integration-test/docker-file-server" -rm -rf $FILE_SERVER_BUILD_DIR/jars -mkdir -p $FILE_SERVER_BUILD_DIR/jars -cp $SPARK_REPO_ROOT/dist/examples/jars/spark-examples*.jar $FILE_SERVER_BUILD_DIR/jars/. -cd $SPARK_REPO_ROOT/dist - -if [[ $DEPLOY_MODE == cloud ]] ; -then - docker build -t $FILE_SERVER_IMAGE "$TEST_ROOT/integration-test/docker-file-server" - ./sbin/build-push-docker-images.sh -r $IMAGE_REPO -t $TAG build - if [[ $IMAGE_REPO == gcr.io* ]] ; - then - gcloud docker -- push $IMAGE_REPO/spark-driver:$TAG && \ - gcloud docker -- push $IMAGE_REPO/spark-executor:$TAG && \ - gcloud docker -- push $IMAGE_REPO/spark-init:$TAG - gcloud docker -- push $FILE_SERVER_IMAGE - else - ./sbin/build-push-docker-images.sh -r $IMAGE_REPO -t $TAG push - docker push $FILE_SERVER_IMAGE - fi -else - # -m option for minikube. - eval $(minikube docker-env) - docker build -t $FILE_SERVER_IMAGE "$TEST_ROOT/integration-test/docker-file-server" - ./sbin/build-push-docker-images.sh -m -r $IMAGE_REPO -t $TAG build -fi - -cd $TEST_ROOT/integration-test -$SPARK_REPO_ROOT/build/mvn clean -Ddownload.plugin.skip=true integration-test \ - -Dspark-distro-tgz=$SPARK_REPO_ROOT/*.tgz \ - -DextraScalaTestArgs="-Dspark.kubernetes.test.master=k8s://$MASTER \ - -Dspark.docker.test.fileServerImage=$FILE_SERVER_IMAGE \ - -Dspark.docker.test.driverImage=$IMAGE_REPO/spark-driver:$TAG \ - -Dspark.docker.test.executorImage=$IMAGE_REPO/spark-executor:$TAG \ - -Dspark.docker.test.initContainerImage=$IMAGE_REPO/spark-init:$TAG" || : - -echo "TEST SUITE FINISHED" diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/docker/KubernetesSuiteDockerManager.scala b/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/docker/KubernetesSuiteDockerManager.scala deleted file mode 100644 index 0163d33..0000000 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/docker/KubernetesSuiteDockerManager.scala +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.k8s.integrationtest.docker - -import java.io.{File, PrintWriter} -import java.net.URI -import java.nio.file.Paths -import java.util.UUID - -import com.google.common.base.Charsets -import com.google.common.io.Files -import com.spotify.docker.client.{DefaultDockerClient, DockerCertificates, LoggingBuildHandler} -import com.spotify.docker.client.DockerClient.{ListContainersParam, ListImagesParam, RemoveContainerParam} -import com.spotify.docker.client.messages.Container -import org.apache.http.client.utils.URIBuilder -import org.scalatest.concurrent.{Eventually, PatienceConfiguration} -import org.scalatest.time.{Minutes, Seconds, Span} -import scala.collection.JavaConverters._ - -import org.apache.spark.deploy.k8s.integrationtest.constants._ -import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite -import org.apache.spark.deploy.k8s.integrationtest.Logging -import org.apache.spark.deploy.k8s.integrationtest.Utils.tryWithResource - -private[spark] class KubernetesSuiteDockerManager( - dockerEnv: Map[String, String], userProvidedDockerImageTag: Option[String]) extends Logging { - - private val DOCKER_BUILD_PATH = SPARK_DISTRO_PATH - // Dockerfile paths must be relative to the build path. - private val DOCKERFILES_DIR = "kubernetes/dockerfiles/" - private val BASE_DOCKER_FILE = DOCKERFILES_DIR + "spark-base/Dockerfile" - private val DRIVER_DOCKER_FILE = DOCKERFILES_DIR + "driver/Dockerfile" - private val EXECUTOR_DOCKER_FILE = DOCKERFILES_DIR + "executor/Dockerfile" - private val INIT_CONTAINER_DOCKER_FILE = DOCKERFILES_DIR + "init-container/Dockerfile" - private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) - private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) - - private val resolvedDockerImageTag = - userProvidedDockerImageTag.getOrElse(UUID.randomUUID().toString.replaceAll("-", "")) - private val dockerHost = dockerEnv.getOrElse("DOCKER_HOST", - throw new IllegalStateException("DOCKER_HOST env not found.")) - private val originalDockerUri = URI.create(dockerHost) - private val httpsDockerUri = new URIBuilder() - .setHost(originalDockerUri.getHost) - .setPort(originalDockerUri.getPort) - .setScheme("https") - .build() - - private val dockerCerts = dockerEnv.getOrElse("DOCKER_CERT_PATH", - throw new IllegalStateException("DOCKER_CERT_PATH env not found.")) - - private val dockerClient = new DefaultDockerClient.Builder() - .uri(httpsDockerUri) - .dockerCertificates(DockerCertificates - .builder() - .dockerCertPath(Paths.get(dockerCerts)) - .build().get()) - .build() - - def buildSparkDockerImages(): Unit = { - if (userProvidedDockerImageTag.isEmpty) { - Eventually.eventually(TIMEOUT, INTERVAL) { - dockerClient.ping() - } - buildImage("spark-base", BASE_DOCKER_FILE) - buildImage("spark-driver", DRIVER_DOCKER_FILE) - buildImage("spark-executor", EXECUTOR_DOCKER_FILE) - buildImage("spark-init", INIT_CONTAINER_DOCKER_FILE) - } - } - - def deleteImages(): Unit = { - if (userProvidedDockerImageTag.isEmpty) { - removeRunningContainers() - deleteImage("spark-base") - deleteImage("spark-driver") - deleteImage("spark-executor") - deleteImage("spark-init") - } - } - - def dockerImageTag(): String = resolvedDockerImageTag - - private def buildImage(name: String, dockerFile: String): Unit = { - logInfo(s"Building Docker image - $name:$resolvedDockerImageTag") - val dockerFileWithBaseTag = new File(DOCKER_BUILD_PATH.resolve( - s"$dockerFile-$resolvedDockerImageTag").toAbsolutePath.toString) - dockerFileWithBaseTag.deleteOnExit() - try { - val originalDockerFileText = Files.readLines( - DOCKER_BUILD_PATH.resolve(dockerFile).toFile, Charsets.UTF_8).asScala - val dockerFileTextWithProperBaseImage = originalDockerFileText.map( - _.replace("FROM spark-base", s"FROM spark-base:$resolvedDockerImageTag")) - tryWithResource(Files.newWriter(dockerFileWithBaseTag, Charsets.UTF_8)) { fileWriter => - tryWithResource(new PrintWriter(fileWriter)) { printWriter => - for (line <- dockerFileTextWithProperBaseImage) { - // scalastyle:off println - printWriter.println(line) - // scalastyle:on println - } - } - } - dockerClient.build( - DOCKER_BUILD_PATH, - s"$name:$resolvedDockerImageTag", - s"$dockerFile-$resolvedDockerImageTag", - new LoggingBuildHandler()) - } finally { - dockerFileWithBaseTag.delete() - } - } - - /** - * Forces all containers running an image with the configured tag to halt and be removed. - */ - private def removeRunningContainers(): Unit = { - val imageIds = dockerClient.listImages(ListImagesParam.allImages()) - .asScala - .filter(image => image.repoTags().asScala.exists(_.endsWith(s":$resolvedDockerImageTag"))) - .map(_.id()) - .toSet - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val runningContainersWithImageTag = stopRunningContainers(imageIds) - require( - runningContainersWithImageTag.isEmpty, - s"${runningContainersWithImageTag.size} containers found still running" + - s" with the image tag $resolvedDockerImageTag") - } - dockerClient.listContainers(ListContainersParam.allContainers()) - .asScala - .filter(container => imageIds.contains(container.imageId())) - .foreach(container => dockerClient.removeContainer( - container.id(), RemoveContainerParam.forceKill(true))) - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val containersWithImageTag = dockerClient.listContainers(ListContainersParam.allContainers()) - .asScala - .filter(container => imageIds.contains(container.imageId())) - require(containersWithImageTag.isEmpty, s"${containersWithImageTag.size} containers still" + - s" found with image tag $resolvedDockerImageTag.") - } - - } - - private def stopRunningContainers(imageIds: Set[String]): Iterable[Container] = { - val runningContainersWithImageTag = getRunningContainersWithImageIds(imageIds) - if (runningContainersWithImageTag.nonEmpty) { - logInfo(s"Found ${runningContainersWithImageTag.size} containers running with" + - s" an image with the tag $resolvedDockerImageTag. Attempting to remove these containers," + - s" and then will stall for 2 seconds.") - runningContainersWithImageTag.foreach { container => - dockerClient.stopContainer(container.id(), 5) - } - } - runningContainersWithImageTag - } - - private def getRunningContainersWithImageIds(imageIds: Set[String]): Iterable[Container] = { - dockerClient - .listContainers( - ListContainersParam.allContainers(), - ListContainersParam.withStatusRunning()) - .asScala - .filter(container => imageIds.contains(container.imageId())) - } - - private def deleteImage(name: String): Unit = { - try { - dockerClient.removeImage(s"$name:$resolvedDockerImageTag") - } catch { - case e: RuntimeException => - logWarning(s"Failed to delete image $name:$resolvedDockerImageTag. There may be images leaking in the" + - s" docker environment which are now stale and unused.", e) - } - } -} diff --git a/integration-test/pom.xml b/pom.xml similarity index 70% rename from integration-test/pom.xml rename to pom.xml index 8d15876..2fae459 100644 --- a/integration-test/pom.xml +++ b/pom.xml @@ -22,6 +22,7 @@ spark-kubernetes-integration-tests 0.1-SNAPSHOT + 3.3.9 3.5 1.1.1 5.0.2 @@ -39,7 +40,16 @@ 1.0 1.7.24 kubernetes-integration-tests - YOUR-SPARK-DISTRO-TARBALL-HERE + N/A + ${project.build.directory}/spark + ${project.build.directory}/spark-dist-unpacked + master + N/A + ${project.build.directory}/imageTag.txt + minikube + docker.io/kubespark + https://github.com/apache/spark + false jar @@ -124,17 +134,43 @@ ${exec-maven-plugin.version} - unpack-spark-distro + setup-integration-test-env pre-integration-test exec - ${project.build.directory} - /bin/sh + scripts/setup-integration-test-env.sh - -c - rm -rf spark-distro; mkdir spark-distro-tmp; cd spark-distro-tmp; tar xfz ${spark-distro-tgz}; mv * ../spark-distro; cd ..; rm -rf spark-distro-tmp; rm -rf docker-file-server/jars; mkdir -p docker-file-server/jars; cp spark-distro/examples/jars/spark-examples*.jar docker-file-server/jars/. + --spark-branch + ${spark.kubernetes.test.sparkBranch} + + --spark-repo + ${spark.kubernetes.test.sparkRepo} + + --spark-repo-local-dir + ${spark.kubernetes.test.sparkRepo.localDir} + + --unpacked-spark-tgz + ${spark.kubernetes.test.unpackSparkDir} + + --image-repo + ${spark.kubernetes.test.imageRepo} + + --image-tag + ${spark.kubernetes.test.imageTag} + + --image-tag-output-file + ${spark.kubernetes.test.imageTagFile} + + --deploy-mode + ${spark.kubernetes.test.deployMode} + + --spark-tgz + ${spark.kubernetes.test.sparkTgz} + + --skip-building-images + ${spark.kubernetes.test.skipBuildingImages} @@ -155,6 +191,9 @@ file:src/test/resources/log4j.properties true + ${spark.kubernetes.test.imageTagFile} + ${spark.kubernetes.test.unpackSparkDir} + ${spark.kubernetes.test.imageRepo} ${test.exclude.tags} diff --git a/scripts/build-spark.sh b/scripts/build-spark.sh new file mode 100755 index 0000000..9cf733b --- /dev/null +++ b/scripts/build-spark.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -ex +SCRIPTS_DIR=$(dirname $0) +source "$SCRIPTS_DIR/parse-arguments.sh" "$@" + +cd $SPARK_REPO_LOCAL_DIR +rm -rf $UNPACKED_SPARK_TGZ +mkdir -p $UNPACKED_SPARK_TGZ +if [[ $SPARK_TGZ == "N/A" ]]; +then + ./dev/make-distribution.sh --tgz -Phadoop-2.7 -Pkubernetes -DskipTests; + tar -xzvf $SPARK_REPO_LOCAL_DIR/spark-*.tgz --strip-components=1 -C $UNPACKED_SPARK_TGZ; +else + tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ +fi; diff --git a/e2e/e2e-minikube.sh b/scripts/clone-spark.sh similarity index 54% rename from e2e/e2e-minikube.sh rename to scripts/clone-spark.sh index 3d2aef2..eb0c7c9 100755 --- a/e2e/e2e-minikube.sh +++ b/scripts/clone-spark.sh @@ -1,5 +1,6 @@ -#!/bin/bash +#!/usr/bin/env bash +# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -14,23 +15,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -### This script can be used to run integration tests locally on minikube. -### Requirements: minikube v0.23+ with the DNS addon enabled, and kubectl configured to point to it. +# set -ex - -### Basic Validation ### -if [ ! -d "integration-test" ]; then - echo "This script must be invoked from the top-level directory of the integration-tests repository" - usage - exit 1 +SCRIPTS_DIR=$(dirname $0) +source "$SCRIPTS_DIR/parse-arguments.sh" "$@" +# clone spark distribution if needed. +if [ -d "$SPARK_REPO_LOCAL_DIR" ]; +then + (cd $SPARK_REPO_LOCAL_DIR && git fetch origin $BRANCH); +else + git clone -b $BRANCH --single-branch $SPARK_REPO $SPARK_REPO_LOCAL_DIR; fi -# Set up config. -master=$(kubectl cluster-info | head -n 1 | grep -oE "https?://[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}(:[0-9]+)?") -repo="https://github.com/apache/spark" -image_repo=test - -# Run tests in minikube mode. -./e2e/runner.sh -m $master -r $repo -i $image_repo -d minikube +cd $SPARK_REPO_LOCAL_DIR +git checkout -B $BRANCH origin/$BRANCH diff --git a/scripts/parse-arguments.sh b/scripts/parse-arguments.sh new file mode 100755 index 0000000..91d9c53 --- /dev/null +++ b/scripts/parse-arguments.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -ex +TEST_ROOT_DIR=$(git rev-parse --show-toplevel) +BRANCH="master" +SPARK_REPO="https://github.com/apache/spark" +SPARK_REPO_LOCAL_DIR="$TEST_ROOT_DIR/target/spark" +UNPACKED_SPARK_TGZ="$TEST_ROOT_DIR/target/spark-dist-unpacked" +IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" +DEPLOY_MODE=minikube +IMAGE_REPO="docker.io/kubespark" +SKIP_BUILDING_IMAGES=false +SPARK_TGZ="N/A" +IMAGE_TAG="N/A" + +# Parse arguments +while (( "$#" )); do + case $1 in + --spark-branch) + BRANCH="$2" + shift + ;; + --spark-repo) + SPARK_REPO="$2" + shift + ;; + --spark-repo-local-dir) + SPARK_REPO_LOCAL_DIR="$2" + shift + ;; + --unpacked-spark-tgz) + UNPACKED_SPARK_TGZ="$2" + shift + ;; + --image-repo) + IMAGE_REPO="$2" + shift + ;; + --image-tag) + IMAGE_TAG="$2" + shift + ;; + --image-tag-output-file) + IMAGE_TAG_OUTPUT_FILE="$2" + shift + ;; + --deploy-mode) + DEPLOY_MODE="$2" + shift + ;; + --spark-tgz) + SPARK_TGZ="$2" + shift + ;; + --skip-building-images) + SKIP_BUILDING_IMAGES="$2" + shift + ;; + *) + break + ;; + esac + shift +done + diff --git a/scripts/prepare-docker-images.sh b/scripts/prepare-docker-images.sh new file mode 100755 index 0000000..36b3d75 --- /dev/null +++ b/scripts/prepare-docker-images.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -ex +SCRIPTS_DIR=$(dirname $0) +source "$SCRIPTS_DIR/parse-arguments.sh" "$@" + +if [[ $IMAGE_TAG == "N/A" ]]; +then + IMAGE_TAG=$(uuidgen); +fi + +$SCRIPTS_DIR/write-docker-tag.sh --image-tag $IMAGE_TAG --image-tag-output-file $IMAGE_TAG_OUTPUT_FILE + +rm -f $IMAGE_TAG_OUTPUT_FILE +touch $IMAGE_TAG_OUTPUT_FILE +echo -n $IMAGE_TAG > $IMAGE_TAG_OUTPUT_FILE + +if [ ! -d "$UNPACKED_SPARK_TGZ" ]; +then + echo "No unpacked distribution was found at $UNPACKED_SPARK_TGZ. Please run clone-spark.sh and build-spark.sh first." && exit 1; +fi + +FILE_SERVER_IMAGE="$IMAGE_REPO/spark-examples-file-server:$IMAGE_TAG" +FILE_SERVER_BUILD_DIR="$TEST_ROOT_DIR/docker-file-server" +rm -rf $FILE_SERVER_BUILD_DIR/jars +mkdir -p $FILE_SERVER_BUILD_DIR/jars +cp $UNPACKED_SPARK_TGZ/examples/jars/spark-examples*.jar $FILE_SERVER_BUILD_DIR/jars/. +cd $UNPACKED_SPARK_TGZ +if [[ $DEPLOY_MODE == cloud ]] ; +then + docker build -t $FILE_SERVER_IMAGE "$FILE_SERVER_BUILD_DIR" + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $TAG build + if [[ $IMAGE_REPO == gcr.io* ]] ; + then + gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG && \ + gcloud docker -- push $FILE_SERVER_IMAGE + else + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push + docker push $FILE_SERVER_IMAGE + fi +else + # -m option for minikube. + eval $(minikube docker-env) + docker build -t $FILE_SERVER_IMAGE $FILE_SERVER_BUILD_DIR + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build +fi diff --git a/scripts/setup-integration-test-env.sh b/scripts/setup-integration-test-env.sh new file mode 100755 index 0000000..e327ac1 --- /dev/null +++ b/scripts/setup-integration-test-env.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SCRIPTS_DIR=$(dirname $0) +source $SCRIPTS_DIR/parse-arguments.sh "$@" + +if [[ $SPARK_TGZ == "N/A" ]]; +then + $SCRIPTS_DIR/clone-spark.sh "$@"; +fi +$SCRIPTS_DIR/build-spark.sh "$@" + +if [[ $SKIP_BUILDING_IMAGES == false ]] ; +then + $SCRIPTS_DIR/prepare-docker-images.sh "$@"; +else + $SCRIPTS_DIR/write-docker-tag.sh "$@"; +fi diff --git a/scripts/write-docker-tag.sh b/scripts/write-docker-tag.sh new file mode 100755 index 0000000..f1d17c2 --- /dev/null +++ b/scripts/write-docker-tag.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SCRIPTS_DIR=$(dirname $0) +source $SCRIPTS_DIR/parse-arguments.sh "$@" + +if [[ $IMAGE_TAG == "N/A" ]]; +then + echo "Image tag must be specified via --image-tag to write."; +fi + +rm -f $IMAGE_TAG_OUTPUT_FILE +touch $IMAGE_TAG_OUTPUT_FILE +echo -n $IMAGE_TAG > $IMAGE_TAG_OUTPUT_FILE diff --git a/integration-test/src/test/resources/log4j.properties b/src/test/resources/log4j.properties similarity index 100% rename from integration-test/src/test/resources/log4j.properties rename to src/test/resources/log4j.properties diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala similarity index 86% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 39bf0d3..4f9d7d1 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.integrationtest import java.io.File import java.net.URI -import java.nio.file.Paths +import java.nio.file.{Path, Paths} import java.util.UUID import java.util.regex.Pattern @@ -31,7 +31,6 @@ import org.scalatest.time.{Minutes, Seconds, Span} import org.apache.spark.deploy.k8s.integrationtest.backend.IntegrationTestBackendFactory import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.MinikubeTestBackend -import org.apache.spark.deploy.k8s.integrationtest.constants._ import org.apache.spark.deploy.k8s.integrationtest.config._ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfter { @@ -39,21 +38,28 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit import KubernetesSuite._ private val testBackend = IntegrationTestBackendFactory.getTestBackend() private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") + private var sparkHomeDir: Path = _ private var kubernetesTestComponents: KubernetesTestComponents = _ private var sparkAppConf: SparkAppConf = _ private var remoteExamplesJarUri: URI = _ - - private val driverImage = System.getProperty( - "spark.docker.test.driverImage", - "spark-driver:latest") - private val executorImage = System.getProperty( - "spark.docker.test.executorImage", - "spark-executor:latest") - private val initContainerImage = System.getProperty( - "spark.docker.test.initContainerImage", - "spark-init:latest") + private var image: String = _ + private var containerLocalSparkDistroExamplesJar: String = _ override def beforeAll(): Unit = { + val sparkDirProp = System.getProperty("spark.kubernetes.test.unpackSparkDir") + require(sparkDirProp != null, "Spark home directory must be provided in system properties.") + sparkHomeDir = Paths.get(sparkDirProp) + require(sparkHomeDir.toFile.isDirectory, + s"No directory found for spark home specified at $sparkHomeDir.") + val imageTag = getTestImageTag + val imageRepo = getTestImageRepo + image = s"$imageRepo/spark:$imageTag" + + val sparkDistroExamplesJarFile: File = sparkHomeDir.resolve(Paths.get("examples", "jars")) + .toFile + .listFiles(new PatternFilenameFilter(Pattern.compile("^spark-examples_.*\\.jar$")))(0) + containerLocalSparkDistroExamplesJar = s"local:///opt/spark/examples/jars/" + + s"${sparkDistroExamplesJarFile.getName}" testBackend.initialize() kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) } @@ -64,16 +70,15 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit before { sparkAppConf = kubernetesTestComponents.newSparkAppConf() - .set("spark.kubernetes.driver.container.image", driverImage) - .set("spark.kubernetes.executor.container.image", executorImage) + .set("spark.kubernetes.container.image", image) .set("spark.kubernetes.driver.label.spark-app-locator", APP_LOCATOR_LABEL) - .set(DRIVER_DOCKER_IMAGE, tagImage("spark-driver")) - .set(EXECUTOR_DOCKER_IMAGE, tagImage("spark-executor")) - .set(INIT_CONTAINER_DOCKER_IMAGE, tagImage("spark-init")) .set("spark.kubernetes.executor.label.spark-app-locator", APP_LOCATOR_LABEL) kubernetesTestComponents.createNamespace() remoteExamplesJarUri = SparkExamplesFileServerRunner - .launchServerAndGetUriForExamplesJar(kubernetesTestComponents) + .launchServerAndGetUriForExamplesJar( + kubernetesTestComponents, + getTestImageTag, + getTestImageRepo) } after { @@ -105,12 +110,10 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit } test("Run SparkPi using the remote example jar.") { - sparkAppConf.set("spark.kubernetes.initContainer.image", initContainerImage) runSparkPiAndVerifyCompletion(appResource = remoteExamplesJarUri.toString) } test("Run SparkPi with custom driver pod name, labels, annotations, and environment variables.") { - doMinikubeCheck sparkAppConf .set("spark.kubernetes.driver.pod.name", "spark-integration-spark-pi") .set("spark.kubernetes.driver.label.label1", "label1-value") @@ -159,7 +162,6 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit .set("spark.kubernetes.mountDependencies.filesDownloadDir", CONTAINER_LOCAL_FILE_DOWNLOAD_PATH) .set("spark.files", REMOTE_PAGE_RANK_DATA_FILE) - .set("spark.kubernetes.initContainer.image", initContainerImage) runSparkPageRankAndVerifyCompletion( appArgs = Array(CONTAINER_LOCAL_DOWNLOADED_PAGE_RANK_DATA_FILE)) } @@ -172,7 +174,6 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit .set("spark.files", REMOTE_PAGE_RANK_DATA_FILE) .set(s"spark.kubernetes.driver.secrets.$TEST_SECRET_NAME", TEST_SECRET_MOUNT_PATH) .set(s"spark.kubernetes.executor.secrets.$TEST_SECRET_NAME", TEST_SECRET_MOUNT_PATH) - .set("spark.kubernetes.initContainer.image", initContainerImage) createTestSecret() @@ -189,7 +190,7 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit } private def runSparkPiAndVerifyCompletion( - appResource: String = CONTAINER_LOCAL_SPARK_DISTRO_EXAMPLES_JAR, + appResource: String = containerLocalSparkDistroExamplesJar, driverPodChecker: Pod => Unit = doBasicDriverPodCheck, executorPodChecker: Pod => Unit = doBasicExecutorPodCheck, appArgs: Array[String] = Array.empty[String]): Unit = { @@ -202,7 +203,7 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit executorPodChecker) } private def runSparkPageRankAndVerifyCompletion( - appResource: String = CONTAINER_LOCAL_SPARK_DISTRO_EXAMPLES_JAR, + appResource: String = containerLocalSparkDistroExamplesJar, driverPodChecker: Pod => Unit = doBasicDriverPodCheck, executorPodChecker: Pod => Unit = doBasicExecutorPodCheck, appArgs: Array[String]): Unit = { @@ -226,7 +227,7 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit mainAppResource = appResource, mainClass = mainClass, appArgs = appArgs) - SparkAppLauncher.launch(appArguments, sparkAppConf, TIMEOUT.value.toSeconds.toInt) + SparkAppLauncher.launch(appArguments, sparkAppConf, TIMEOUT.value.toSeconds.toInt, sparkHomeDir) val driverPod = kubernetesTestComponents.kubernetesClient .pods() @@ -257,15 +258,14 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit } } } - private def tagImage(image: String): String = s"$image:${testBackend.dockerImageTag()}" private def doBasicDriverPodCheck(driverPod: Pod): Unit = { - assert(driverPod.getSpec.getContainers.get(0).getImage === driverImage) + assert(driverPod.getSpec.getContainers.get(0).getImage === image) assert(driverPod.getSpec.getContainers.get(0).getName === "spark-kubernetes-driver") } private def doBasicExecutorPodCheck(executorPod: Pod): Unit = { - assert(executorPod.getSpec.getContainers.get(0).getImage === executorImage) + assert(executorPod.getSpec.getContainers.get(0).getImage === image) assert(executorPod.getSpec.getContainers.get(0).getName === "executor") } @@ -325,12 +325,6 @@ private[spark] object KubernetesSuite { val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) - val SPARK_DISTRO_EXAMPLES_JAR_FILE: File = Paths.get(SPARK_DISTRO_PATH.toFile.getAbsolutePath, - "examples", "jars") - .toFile - .listFiles(new PatternFilenameFilter(Pattern.compile("^spark-examples_.*\\.jar$")))(0) - val CONTAINER_LOCAL_SPARK_DISTRO_EXAMPLES_JAR: String = s"local:///opt/spark/examples/jars/" + - s"${SPARK_DISTRO_EXAMPLES_JAR_FILE.getName}" val SPARK_PI_MAIN_CLASS: String = "org.apache.spark.examples.SparkPi" val SPARK_PAGE_RANK_MAIN_CLASS: String = "org.apache.spark.examples.SparkPageRank" diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala similarity index 89% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala index 00ef1c5..fb285e7 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala @@ -16,17 +16,14 @@ */ package org.apache.spark.deploy.k8s.integrationtest -import java.nio.file.Paths +import java.nio.file.{Path, Paths} import java.util.UUID import scala.collection.mutable import scala.collection.JavaConverters._ - import io.fabric8.kubernetes.client.DefaultKubernetesClient import org.scalatest.concurrent.Eventually -import org.apache.spark.deploy.k8s.integrationtest.constants.SPARK_DISTRO_PATH - private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesClient) { val namespace = UUID.randomUUID().toString.replaceAll("-", "") @@ -92,12 +89,14 @@ private[spark] case class SparkAppArguments( private[spark] object SparkAppLauncher extends Logging { - private val SPARK_SUBMIT_EXECUTABLE_DEST = Paths.get(SPARK_DISTRO_PATH.toFile.getAbsolutePath, - "bin", "spark-submit").toFile - - def launch(appArguments: SparkAppArguments, appConf: SparkAppConf, timeoutSecs: Int): Unit = { + def launch( + appArguments: SparkAppArguments, + appConf: SparkAppConf, + timeoutSecs: Int, + sparkHomeDir: Path): Unit = { + val sparkSubmitExecutable = sparkHomeDir.resolve(Paths.get("bin", "spark-submit")) logInfo(s"Launching a spark app with arguments $appArguments and conf $appConf") - val commandLine = Array(SPARK_SUBMIT_EXECUTABLE_DEST.getAbsolutePath, + val commandLine = Array(sparkSubmitExecutable.toFile.getAbsolutePath, "--deploy-mode", "cluster", "--class", appArguments.mainClass, "--master", appConf.get("spark.master") diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Logging.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Logging.scala similarity index 100% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Logging.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Logging.scala diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala similarity index 100% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala similarity index 95% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala index fa96b1f..c853706 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala @@ -25,8 +25,6 @@ import org.apache.http.client.utils.URIBuilder private[spark] object SparkExamplesFileServerRunner { - private val fileServerImage = System.getProperty( - "spark.docker.test.fileServerImage", "spark-examples-file-server:latest") private val fileServerExampleJarsDir = Paths.get("docker-file-server", "jars") require( fileServerExampleJarsDir @@ -50,7 +48,10 @@ private[spark] object SparkExamplesFileServerRunner { private val fileServerName = "spark-examples-file-server" def launchServerAndGetUriForExamplesJar( - kubernetesTestComponents: KubernetesTestComponents): URI = { + kubernetesTestComponents: KubernetesTestComponents, + fileServerImageTag: String, + fileServerImageRepo: String): URI = { + val fileServerImage = s"$fileServerImageRepo/spark-examples-file-server:$fileServerImageTag" val podReadinessWatcher = new SparkReadinessWatcher[Pod] Utils.tryWithResource( kubernetesTestComponents diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkReadinessWatcher.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkReadinessWatcher.scala similarity index 100% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkReadinessWatcher.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkReadinessWatcher.scala diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala similarity index 100% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/GCE/GCETestBackend.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/GCE/GCETestBackend.scala similarity index 92% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/GCE/GCETestBackend.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/GCE/GCETestBackend.scala index 345ccc8..5639f97 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/GCE/GCETestBackend.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/GCE/GCETestBackend.scala @@ -36,8 +36,4 @@ private[spark] class GCETestBackend(val master: String) extends IntegrationTestB override def getKubernetesClient(): DefaultKubernetesClient = { defaultClient } - - override def dockerImageTag(): String = { - return System.getProperty(KUBERNETES_TEST_DOCKER_TAG_SYSTEM_PROPERTY, "latest") - } } diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala similarity index 97% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala index 9c64c64..67f8540 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/IntegrationTestBackend.scala @@ -25,7 +25,6 @@ import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.MinikubeTest private[spark] trait IntegrationTestBackend { def initialize(): Unit def getKubernetesClient: DefaultKubernetesClient - def dockerImageTag(): String def cleanUp(): Unit = {} } diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala similarity index 89% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala index 8204852..cd1365a 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala @@ -43,14 +43,6 @@ private[spark] object Minikube extends Logging { .getOrElse(throw new IllegalStateException(s"Unknown status $statusString")) } - def getDockerEnv: Map[String, String] = { - executeMinikube("docker-env", "--shell", "bash") - .filter(_.startsWith("export")) - .map(_.replaceFirst("export ", "").split('=')) - .map(arr => (arr(0), arr(1).replaceAllLiterally("\"", ""))) - .toMap - } - def getKubernetesClient: DefaultKubernetesClient = { val kubernetesMaster = s"https://${getMinikubeIp}:8443" val userHome = System.getProperty("user.home") @@ -64,10 +56,6 @@ private[spark] object Minikube extends Logging { new DefaultKubernetesClient(kubernetesConf) } - def executeMinikubeSsh(command: String): Unit = { - executeMinikube("ssh", command) - } - private def executeMinikube(action: String, args: String*): Seq[String] = { ProcessUtils.executeProcess( Array("minikube", action) ++ args, MINIKUBE_STARTUP_TIMEOUT_SECONDS) diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala similarity index 66% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala index 89db42f..1bb888f 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/MinikubeTestBackend.scala @@ -19,33 +19,23 @@ package org.apache.spark.deploy.k8s.integrationtest.backend.minikube import io.fabric8.kubernetes.client.DefaultKubernetesClient import org.apache.spark.deploy.k8s.integrationtest.backend.IntegrationTestBackend -import org.apache.spark.deploy.k8s.integrationtest.config._ -import org.apache.spark.deploy.k8s.integrationtest.docker.KubernetesSuiteDockerManager private[spark] object MinikubeTestBackend extends IntegrationTestBackend { private var defaultClient: DefaultKubernetesClient = _ - private val userProvidedDockerImageTag = Option( - System.getProperty(KUBERNETES_TEST_DOCKER_TAG_SYSTEM_PROPERTY)) - private val dockerManager = new KubernetesSuiteDockerManager( - Minikube.getDockerEnv, userProvidedDockerImageTag) override def initialize(): Unit = { val minikubeStatus = Minikube.getMinikubeStatus require(minikubeStatus == MinikubeStatus.RUNNING, - s"Minikube must be running before integration tests can execute. Current status" + - s" is: $minikubeStatus") - dockerManager.buildSparkDockerImages() + s"Minikube must be running to use the Minikube backend for integration tests." + + s" Current status is: $minikubeStatus.") defaultClient = Minikube.getKubernetesClient } override def cleanUp(): Unit = { super.cleanUp() - dockerManager.deleteImages() } - override def getKubernetesClient(): DefaultKubernetesClient = { + override def getKubernetesClient: DefaultKubernetesClient = { defaultClient } - - override def dockerImageTag(): String = dockerManager.dockerImageTag() } diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala similarity index 62% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala index d82a1de..6a4106d 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala @@ -16,9 +16,28 @@ */ package org.apache.spark.deploy.k8s.integrationtest +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.Files + package object config { val KUBERNETES_TEST_DOCKER_TAG_SYSTEM_PROPERTY = "spark.kubernetes.test.imageDockerTag" val DRIVER_DOCKER_IMAGE = "spark.kubernetes.driver.container.image" val EXECUTOR_DOCKER_IMAGE = "spark.kubernetes.executor.container.image" val INIT_CONTAINER_DOCKER_IMAGE = "spark.kubernetes.initcontainer.container.image" + + def getTestImageTag: String = { + val imageTagFileProp = System.getProperty("spark.kubernetes.test.imageTagFile") + require(imageTagFileProp != null, "Image tag file must be provided in system properties.") + val imageTagFile = new File(imageTagFileProp) + require(imageTagFile.isFile, s"No file found for image tag at ${imageTagFile.getAbsolutePath}.") + Files.toString(imageTagFile, Charsets.UTF_8).trim + } + + def getTestImageRepo: String = { + val imageRepo = System.getProperty("spark.kubernetes.test.imageRepo") + require(imageRepo != null, "Image repo must be provided in system properties.") + imageRepo + } } diff --git a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/constants.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/constants.scala similarity index 91% rename from integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/constants.scala rename to src/test/scala/org/apache/spark/deploy/k8s/integrationtest/constants.scala index 9137199..0807a68 100644 --- a/integration-test/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/constants.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/constants.scala @@ -16,10 +16,7 @@ */ package org.apache.spark.deploy.k8s.integrationtest -import java.nio.file.Paths - package object constants { val MINIKUBE_TEST_BACKEND = "minikube" val GCE_TEST_BACKEND = "gce" - val SPARK_DISTRO_PATH = Paths.get("target", "spark-distro") } From 71ac3e91554659b77c171d132546400c5dd28e96 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 13:31:11 -0800 Subject: [PATCH 03/22] Update documentation --- README.md | 163 +++++++++++++++++++++++------------------------------- 1 file changed, 68 insertions(+), 95 deletions(-) diff --git a/README.md b/README.md index 8221265..8f66d75 100644 --- a/README.md +++ b/README.md @@ -8,98 +8,71 @@ title: Spark on Kubernetes Integration Tests Note that the integration test framework is currently being heavily revised and is subject to change. Note that currently the integration tests only run with Java 8. -As shorthand to run the tests against any given cluster, you can use the `e2e/runner.sh` script. -The script assumes that you have a functioning Kubernetes cluster (1.6+) with kubectl -configured to access it. The master URL of the currently configured cluster on your -machine can be discovered as follows: - -``` -$ kubectl cluster-info - -Kubernetes master is running at https://xyz -``` - -If you want to use a local [minikube](https://github.com/kubernetes/minikube) cluster, -the minimum tested version is 0.23.0, with the kube-dns addon enabled -and the recommended configuration is 3 CPUs and 4G of memory. There is also a wrapper -script for running on minikube, `e2e/e2e-minikube.sh` for testing the master branch -of the apache/spark repository in specific. - -``` -$ minikube start --memory 4000 --cpus 3 -``` - -If you're using a non-local cluster, you must provide an image repository -which you have write access to, using the `-i` option, in order to store docker images -generated during the test. - -Example usages of the script: - -``` -$ ./e2e/runner.sh -m https://xyz -i docker.io/foxish -d cloud -$ ./e2e/runner.sh -m https://xyz -i test -d minikube -$ ./e2e/runner.sh -m https://xyz -i test -r https://github.com/my-spark/spark -d minikube -$ ./e2e/runner.sh -m https://xyz -i test -r https://github.com/my-spark/spark -b my-branch -d minikube -``` - -# Detailed Documentation - -## Running the tests using maven - -Integration tests firstly require installing [Minikube](https://kubernetes.io/docs/getting-started-guides/minikube/) on -your machine, and for the `Minikube` binary to be on your `PATH`.. Refer to the Minikube documentation for instructions -on how to install it. It is recommended to allocate at least 8 CPUs and 8GB of memory to the Minikube cluster. - -Running the integration tests requires a Spark distribution package tarball that -contains Spark jars, submission clients, etc. You can download a tarball from -http://spark.apache.org/downloads.html. Or, you can create a distribution from -source code using `make-distribution.sh`. For example: - -``` -$ git clone git@github.com:apache/spark.git -$ cd spark -$ ./dev/make-distribution.sh --tgz \ - -Phadoop-2.7 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver -``` - -The above command will create a tarball like spark-2.3.0-SNAPSHOT-bin.tgz in the -top-level dir. For more details, see the related section in -[building-spark.md](https://github.com/apache/spark/blob/master/docs/building-spark.md#building-a-runnable-distribution) - - -Once you prepare the tarball, the integration tests can be executed with Maven or -your IDE. Note that when running tests from an IDE, the `pre-integration-test` -phase must be run every time the Spark main code changes. When running tests -from the command line, the `pre-integration-test` phase should automatically be -invoked if the `integration-test` phase is run. - -With Maven, the integration test can be run using the following command: - -``` -$ mvn clean integration-test \ - -Dspark-distro-tgz=spark/spark-2.3.0-SNAPSHOT-bin.tgz -``` - -## Running against an arbitrary cluster - -In order to run against any cluster, use the following: -```sh -$ mvn clean integration-test \ - -Dspark-distro-tgz=spark/spark-2.3.0-SNAPSHOT-bin.tgz \ - -DextraScalaTestArgs="-Dspark.kubernetes.test.master=k8s://https:// - -## Reuse the previous Docker images - -The integration tests build a number of Docker images, which takes some time. -By default, the images are built every time the tests run. You may want to skip -re-building those images during development, if the distribution package did not -change since the last run. You can pass the property -`spark.kubernetes.test.imageDockerTag` to the test process and specify the Docker -image tag that is appropriate. -Here is an example: - -``` -$ mvn clean integration-test \ - -Dspark-distro-tgz=spark/spark-2.3.0-SNAPSHOT-bin.tgz \ - -Dspark.kubernetes.test.imageDockerTag=latest -``` +The simplest way to run the integration tests is to install and run Minikube, then run the following: + + build/mvn integration-test + +The minimum tested version of Minikube is 0.23.0. The kube-dns addon must be enabled. Minikube should +run with a minimum of 3 CPUs and 4G of memory: + + minikube start --cpus 3 --memory 4G + +You can download Minikube [here](https://github.com/kubernetes/minikube/releases). + +# Integration test customization + +Configuration of the integration test runtime is done through passing different Java system properties to the Maven +command. The main useful options are outlined below. + +## Use a non-local cluster + +To use your own cluster running in the cloud, set the following: + +* `spark.kubernetes.test.deployMode` to `cloud` to indicate that Minikube will not be used. +* `spark.kubernetes.test.master` to your cluster's externally accessible URL +* `spark.kubernetes.test.imageRepo` to a write-accessible Docker image repository that provides the images for your +cluster. The framework assumes your local Docker client can push to this repository. + +Therefore the command looks like this: + + build/mvn integration-test \ + -Dspark.kubernetes.test.deployMode=cloud \ + -Dspark.kubernetes.test.master=https://example.com:8443/apiserver \ + -Dspark.kubernetes.test.repo=docker.example.com/spark-images + +## Re-using Docker Images + +By default, the test framework will build new Docker images on every test execution. A unique image tag is generated, +and it is written to file at `target/imageTag.txt`. To reuse the images built in a previous run, set: + +* `spark.kubernetes.test.imageTag` to the tag specified in `target/imageTag.txt` +* `spark.kubernetes.test.skipBuildingImages` to `true` + +Therefore the command looks like this: + + build/mvn integration-test \ + -Dspark.kubernetes.test.imageTag=$(cat target/imageTag.txt) \ + -Dspark.kubernetes.test.skipBuildingImages=true + +## Customizing the Spark Source Code to Test + +By default, the test framework will test the master branch of Spark from [here](https://github.com/apache/spark). You +can specify the following options to test against different source versions of Spark: + +* `spark.kubernetes.test.sparkRepo` to the git or http URI of the Spark git repository to clone +* `spark.kubernetes.test.sparkBranch` to the branch of the repository to build. + +An example: + + build/mvn integration-test \ + -Dspark.kubernetes.test.sparkRepo=https://github.com/apache-spark-on-k8s/spark \ + -Dspark.kubernetes.test.sparkBranch=new-feature + +Additionally, you can use a pre-built Spark distribution. In this case, the repository is not cloned at all, and no +source code has to be compiled. + +* `spark.kubernetes.test.sparkTgz` can be set to a tarball containing the Spark distribution to test. + +When the tests are cloning a repository and building it, the Spark distribution is placed in +`target/spark/spark-.tgz`. Reuse this tarball to save a significant amount of time if you are iterating on +the development of these integration tests. From df00060c881662c1f7b8f674faf8e4b3e2dee3f8 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 13:34:55 -0800 Subject: [PATCH 04/22] Remove unnecessary .gitignore entries --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index aed5cf2..261db53 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ .idea/ -spark/ -spark-dist target/ build/*.jar build/apache-maven* From dcd44f477aa751d39eb4fb35e5c48853c9d14e3a Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 13:44:06 -0800 Subject: [PATCH 05/22] Use $IMAGE_TAG instead of $TAG --- scripts/prepare-docker-images.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/prepare-docker-images.sh b/scripts/prepare-docker-images.sh index 36b3d75..389daf7 100755 --- a/scripts/prepare-docker-images.sh +++ b/scripts/prepare-docker-images.sh @@ -46,7 +46,7 @@ cd $UNPACKED_SPARK_TGZ if [[ $DEPLOY_MODE == cloud ]] ; then docker build -t $FILE_SERVER_IMAGE "$FILE_SERVER_BUILD_DIR" - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $TAG build + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build if [[ $IMAGE_REPO == gcr.io* ]] ; then gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG && \ From 03c5977c03e46436df4f7a4bf21ff30a540834dc Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 13:45:08 -0800 Subject: [PATCH 06/22] Don't write image tag file twice --- scripts/prepare-docker-images.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/prepare-docker-images.sh b/scripts/prepare-docker-images.sh index 389daf7..2d2ceea 100755 --- a/scripts/prepare-docker-images.sh +++ b/scripts/prepare-docker-images.sh @@ -28,10 +28,6 @@ fi $SCRIPTS_DIR/write-docker-tag.sh --image-tag $IMAGE_TAG --image-tag-output-file $IMAGE_TAG_OUTPUT_FILE -rm -f $IMAGE_TAG_OUTPUT_FILE -touch $IMAGE_TAG_OUTPUT_FILE -echo -n $IMAGE_TAG > $IMAGE_TAG_OUTPUT_FILE - if [ ! -d "$UNPACKED_SPARK_TGZ" ]; then echo "No unpacked distribution was found at $UNPACKED_SPARK_TGZ. Please run clone-spark.sh and build-spark.sh first." && exit 1; From 016505d961688abf29a4b0e35fc2e4c9d394a1c8 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 15:19:02 -0800 Subject: [PATCH 07/22] Remove using nginx file server --- README.md | 4 +- docker-file-server/.gitignore | 1 - docker-file-server/Dockerfile | 4 - docker-file-server/nginx.conf | 34 ----- scripts/prepare-docker-images.sh | 9 -- .../k8s/integrationtest/KubernetesSuite.scala | 14 +- .../SparkExamplesFileServerRunner.scala | 123 ------------------ 7 files changed, 4 insertions(+), 185 deletions(-) delete mode 100644 docker-file-server/.gitignore delete mode 100644 docker-file-server/Dockerfile delete mode 100644 docker-file-server/nginx.conf delete mode 100644 src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala diff --git a/README.md b/README.md index 8f66d75..4898b17 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ is subject to change. Note that currently the integration tests only run with Ja The simplest way to run the integration tests is to install and run Minikube, then run the following: build/mvn integration-test - + The minimum tested version of Minikube is 0.23.0. The kube-dns addon must be enabled. Minikube should run with a minimum of 3 CPUs and 4G of memory: @@ -39,7 +39,7 @@ Therefore the command looks like this: -Dspark.kubernetes.test.deployMode=cloud \ -Dspark.kubernetes.test.master=https://example.com:8443/apiserver \ -Dspark.kubernetes.test.repo=docker.example.com/spark-images - + ## Re-using Docker Images By default, the test framework will build new Docker images on every test execution. A unique image tag is generated, diff --git a/docker-file-server/.gitignore b/docker-file-server/.gitignore deleted file mode 100644 index 2723de6..0000000 --- a/docker-file-server/.gitignore +++ /dev/null @@ -1 +0,0 @@ -jars diff --git a/docker-file-server/Dockerfile b/docker-file-server/Dockerfile deleted file mode 100644 index 537748d..0000000 --- a/docker-file-server/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -FROM nginx:alpine - -COPY jars /opt/spark/jars -COPY nginx.conf /etc/nginx/nginx.conf diff --git a/docker-file-server/nginx.conf b/docker-file-server/nginx.conf deleted file mode 100644 index ce0a45f..0000000 --- a/docker-file-server/nginx.conf +++ /dev/null @@ -1,34 +0,0 @@ -user nginx; -worker_processes 1; - -error_log /var/log/nginx/error.log warn; -pid /var/run/nginx.pid; - -events { - worker_connections 1024; -} - -http { - server { - root /opt/spark/jars; - location /ping { - return 200 'pong'; - add_header Content-Type text/plain; - } - } - include /etc/nginx/mime.types; - default_type application/octet-stream; - - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent" "$http_x_forwarded_for"'; - - access_log /var/log/nginx/access.log main; - - sendfile on; - #tcp_nopush on; - - keepalive_timeout 65; - - #gzip on; -} diff --git a/scripts/prepare-docker-images.sh b/scripts/prepare-docker-images.sh index 2d2ceea..78bc6e3 100755 --- a/scripts/prepare-docker-images.sh +++ b/scripts/prepare-docker-images.sh @@ -33,27 +33,18 @@ then echo "No unpacked distribution was found at $UNPACKED_SPARK_TGZ. Please run clone-spark.sh and build-spark.sh first." && exit 1; fi -FILE_SERVER_IMAGE="$IMAGE_REPO/spark-examples-file-server:$IMAGE_TAG" -FILE_SERVER_BUILD_DIR="$TEST_ROOT_DIR/docker-file-server" -rm -rf $FILE_SERVER_BUILD_DIR/jars -mkdir -p $FILE_SERVER_BUILD_DIR/jars -cp $UNPACKED_SPARK_TGZ/examples/jars/spark-examples*.jar $FILE_SERVER_BUILD_DIR/jars/. cd $UNPACKED_SPARK_TGZ if [[ $DEPLOY_MODE == cloud ]] ; then - docker build -t $FILE_SERVER_IMAGE "$FILE_SERVER_BUILD_DIR" $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build if [[ $IMAGE_REPO == gcr.io* ]] ; then gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG && \ - gcloud docker -- push $FILE_SERVER_IMAGE else $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push - docker push $FILE_SERVER_IMAGE fi else # -m option for minikube. eval $(minikube docker-env) - docker build -t $FILE_SERVER_IMAGE $FILE_SERVER_BUILD_DIR $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build fi diff --git a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 4f9d7d1..7802b97 100644 --- a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -41,7 +41,6 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit private var sparkHomeDir: Path = _ private var kubernetesTestComponents: KubernetesTestComponents = _ private var sparkAppConf: SparkAppConf = _ - private var remoteExamplesJarUri: URI = _ private var image: String = _ private var containerLocalSparkDistroExamplesJar: String = _ @@ -74,11 +73,6 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit .set("spark.kubernetes.driver.label.spark-app-locator", APP_LOCATOR_LABEL) .set("spark.kubernetes.executor.label.spark-app-locator", APP_LOCATOR_LABEL) kubernetesTestComponents.createNamespace() - remoteExamplesJarUri = SparkExamplesFileServerRunner - .launchServerAndGetUriForExamplesJar( - kubernetesTestComponents, - getTestImageTag, - getTestImageRepo) } after { @@ -109,10 +103,6 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit runSparkPiAndVerifyCompletion(appArgs = Array("5")) } - test("Run SparkPi using the remote example jar.") { - runSparkPiAndVerifyCompletion(appResource = remoteExamplesJarUri.toString) - } - test("Run SparkPi with custom driver pod name, labels, annotations, and environment variables.") { sparkAppConf .set("spark.kubernetes.driver.pod.name", "spark-integration-spark-pi") @@ -177,8 +167,8 @@ private[spark] class KubernetesSuite extends FunSuite with BeforeAndAfterAll wit createTestSecret() - runSparkPiAndVerifyCompletion( - appResource = remoteExamplesJarUri.toString, + runSparkPageRankAndVerifyCompletion( + appArgs = Array(CONTAINER_LOCAL_DOWNLOADED_PAGE_RANK_DATA_FILE), driverPodChecker = (driverPod: Pod) => { doBasicDriverPodCheck(driverPod) checkTestSecret(driverPod, withInitContainer = true) diff --git a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala deleted file mode 100644 index c853706..0000000 --- a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkExamplesFileServerRunner.scala +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.k8s.integrationtest - -import java.net.{URI, URL} -import java.nio.file.Paths -import java.util.UUID - -import io.fabric8.kubernetes.api.model.{Endpoints, Pod, Service} -import org.apache.http.client.utils.URIBuilder - -private[spark] object SparkExamplesFileServerRunner { - - private val fileServerExampleJarsDir = Paths.get("docker-file-server", "jars") - require( - fileServerExampleJarsDir - .toFile - .listFiles() - .exists(file => file.getName.startsWith("spark-examples")), - s"No spark-examples jar found in $fileServerExampleJarsDir.") - require( - fileServerExampleJarsDir - .toFile - .listFiles() - .count(file => file.getName.startsWith("spark-examples")) == 1, - s"Multiple spark-examples jars found in $fileServerExampleJarsDir.") - private val fileServerExampleJar = Paths.get("docker-file-server", "jars") - .toFile - .listFiles() - .filter(file => file.getName.startsWith("spark-examples"))(0) - .getName - private val fileServerPodLocatorLabelKey = "fileServerLocator" - private val fileServerPodLocatorLabelValue = UUID.randomUUID().toString.replaceAll("-", "") - private val fileServerName = "spark-examples-file-server" - - def launchServerAndGetUriForExamplesJar( - kubernetesTestComponents: KubernetesTestComponents, - fileServerImageTag: String, - fileServerImageRepo: String): URI = { - val fileServerImage = s"$fileServerImageRepo/spark-examples-file-server:$fileServerImageTag" - val podReadinessWatcher = new SparkReadinessWatcher[Pod] - Utils.tryWithResource( - kubernetesTestComponents - .kubernetesClient - .pods() - .withName(fileServerName) - .watch(podReadinessWatcher)) { _ => - kubernetesTestComponents.kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(fileServerName) - .addToLabels(fileServerPodLocatorLabelKey, fileServerPodLocatorLabelValue) - .endMetadata() - .withNewSpec() - .addNewContainer() - .withName("main") - .withImage(fileServerImage) - .withImagePullPolicy("Never") - .withNewReadinessProbe() - .withNewHttpGet() - .withNewPort(80) - .withPath("/ping") - .endHttpGet() - .endReadinessProbe() - .endContainer() - .endSpec() - .done() - podReadinessWatcher.waitUntilReady() - } - val endpointsReadinessWatcher = new SparkReadinessWatcher[Endpoints] - Utils.tryWithResource( - kubernetesTestComponents - .kubernetesClient - .endpoints() - .withName(fileServerName) - .watch(endpointsReadinessWatcher)) { _ => - kubernetesTestComponents.kubernetesClient.services().createNew() - .withNewMetadata() - .withName(fileServerName) - .endMetadata() - .withNewSpec() - .addToSelector(fileServerPodLocatorLabelKey, fileServerPodLocatorLabelValue) - .addNewPort() - .withName("file-server-port") - .withNewTargetPort(80) - .withPort(80) - .endPort() - .withType("NodePort") - .endSpec() - .done() - endpointsReadinessWatcher.waitUntilReady() - } - val resolvedNodePort = kubernetesTestComponents - .kubernetesClient - .services() - .withName(fileServerName) - .get() - .getSpec - .getPorts - .get(0) - .getNodePort - val masterHostname = URI.create(kubernetesTestComponents.clientConfig.getMasterUrl).getHost - new URIBuilder() - .setHost(masterHostname) - .setPort(resolvedNodePort) - .setScheme("http") - .setPath(s"/$fileServerExampleJar") - .build() - } -} From f2aa748b3838906188327c154e114460194c0c14 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 15:44:02 -0800 Subject: [PATCH 08/22] Remove some lines --- .../org/apache/spark/deploy/k8s/integrationtest/config.scala | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala index 6a4106d..a81ef45 100644 --- a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/config.scala @@ -22,11 +22,6 @@ import com.google.common.base.Charsets import com.google.common.io.Files package object config { - val KUBERNETES_TEST_DOCKER_TAG_SYSTEM_PROPERTY = "spark.kubernetes.test.imageDockerTag" - val DRIVER_DOCKER_IMAGE = "spark.kubernetes.driver.container.image" - val EXECUTOR_DOCKER_IMAGE = "spark.kubernetes.executor.container.image" - val INIT_CONTAINER_DOCKER_IMAGE = "spark.kubernetes.initcontainer.container.image" - def getTestImageTag: String = { val imageTagFileProp = System.getProperty("spark.kubernetes.test.imageTagFile") require(imageTagFileProp != null, "Image tag file must be provided in system properties.") From b4f2004108e7678caf28bac0d45c3f92c0e5b316 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 18:40:27 -0800 Subject: [PATCH 09/22] Split building Spark for dev environment from build reactor --- .gitignore | 1 + README.md | 42 ++--- build/mvn | 144 +----------------- .../dev-run-integration-tests.sh | 65 +++++--- pom.xml | 13 -- scripts/build-spark.sh | 33 ---- scripts/clone-spark.sh | 32 ---- scripts/prepare-docker-images.sh | 50 ------ scripts/setup-integration-test-env.sh | 81 +++++++++- scripts/write-docker-tag.sh | 30 ---- 10 files changed, 149 insertions(+), 342 deletions(-) rename scripts/parse-arguments.sh => dev/dev-run-integration-tests.sh (51%) delete mode 100755 scripts/build-spark.sh delete mode 100755 scripts/clone-spark.sh delete mode 100755 scripts/prepare-docker-images.sh delete mode 100755 scripts/write-docker-tag.sh diff --git a/.gitignore b/.gitignore index 261db53..b0e7abb 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ build/*.jar build/apache-maven* build/scala* build/zinc* +build/run-mvn *.class *.log *.iml diff --git a/README.md b/README.md index 4898b17..1fc4f7a 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ is subject to change. Note that currently the integration tests only run with Ja The simplest way to run the integration tests is to install and run Minikube, then run the following: - build/mvn integration-test + dev/dev-run-integration-tests.sh The minimum tested version of Minikube is 0.23.0. The kube-dns addon must be enabled. Minikube should run with a minimum of 3 CPUs and 4G of memory: @@ -28,50 +28,50 @@ command. The main useful options are outlined below. To use your own cluster running in the cloud, set the following: -* `spark.kubernetes.test.deployMode` to `cloud` to indicate that Minikube will not be used. -* `spark.kubernetes.test.master` to your cluster's externally accessible URL -* `spark.kubernetes.test.imageRepo` to a write-accessible Docker image repository that provides the images for your -cluster. The framework assumes your local Docker client can push to this repository. +* `--deploy-mode cloud` to indicate that the test is connecting to a remote cluster instead of Minikube, +* `--spark-master ` - set `` to the externally accessible Kubernetes cluster URL, +* `--image-repo ` - set `` to a write-accessible Docker image repository that provides the images for your cluster. The framework assumes your local Docker client can push to this repository. Therefore the command looks like this: - build/mvn integration-test \ - -Dspark.kubernetes.test.deployMode=cloud \ - -Dspark.kubernetes.test.master=https://example.com:8443/apiserver \ - -Dspark.kubernetes.test.repo=docker.example.com/spark-images + dev/dev-run-integration-tests.sh \ + --deploy-mode cloud \ + --spark-master https://example.com:8443/apiserver \ + --image-repo docker.example.com/spark-images ## Re-using Docker Images By default, the test framework will build new Docker images on every test execution. A unique image tag is generated, -and it is written to file at `target/imageTag.txt`. To reuse the images built in a previous run, set: +and it is written to file at `target/imageTag.txt`. To reuse the images built in a previous run, pass these arguments: -* `spark.kubernetes.test.imageTag` to the tag specified in `target/imageTag.txt` -* `spark.kubernetes.test.skipBuildingImages` to `true` +* `--image-tag ` - set `` to the tag specified in `target/imageTag.txt` +* `--skip-building-images` to inform the framework to not build the images. Therefore the command looks like this: - build/mvn integration-test \ - -Dspark.kubernetes.test.imageTag=$(cat target/imageTag.txt) \ - -Dspark.kubernetes.test.skipBuildingImages=true + dev/dev-run-integration-tests.sh \ + --image-tag $(cat target/imageTag.txt) \ + --skip-building-images ## Customizing the Spark Source Code to Test By default, the test framework will test the master branch of Spark from [here](https://github.com/apache/spark). You can specify the following options to test against different source versions of Spark: -* `spark.kubernetes.test.sparkRepo` to the git or http URI of the Spark git repository to clone -* `spark.kubernetes.test.sparkBranch` to the branch of the repository to build. +* `--spark-repo ` - set `` to the git or http URI of the Spark git repository to clone +* `--spark-branch ` - set `` to the branch of the repository to build. + An example: - build/mvn integration-test \ - -Dspark.kubernetes.test.sparkRepo=https://github.com/apache-spark-on-k8s/spark \ - -Dspark.kubernetes.test.sparkBranch=new-feature + dev/dev-run-integration-tests.sh \ + --spark-repo https://github.com/apache-spark-on-k8s/spark \ + --spark-branch new-feature Additionally, you can use a pre-built Spark distribution. In this case, the repository is not cloned at all, and no source code has to be compiled. -* `spark.kubernetes.test.sparkTgz` can be set to a tarball containing the Spark distribution to test. +* `--spark-tgz ` - set ` to point to a tarball containing the Spark distribution to test. When the tests are cloning a repository and building it, the Spark distribution is placed in `target/spark/spark-.tgz`. Reuse this tarball to save a significant amount of time if you are iterating on diff --git a/build/mvn b/build/mvn index c6051ec..85738d0 100755 --- a/build/mvn +++ b/build/mvn @@ -17,142 +17,8 @@ # limitations under the License. # -# Determine the current working directory -_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -# Preserve the calling directory -_CALLING_DIR="$(pwd)" -# Options used during compilation -_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m" - -# Installs any application tarball given a URL, the expected tarball name, -# and, optionally, a checkable binary path to determine if the binary has -# already been installed -## Arg1 - URL -## Arg2 - Tarball Name -## Arg3 - Checkable Binary -install_app() { - local remote_tarball="$1/$2" - local local_tarball="${_DIR}/$2" - local binary="${_DIR}/$3" - - # setup `curl` and `wget` silent options if we're running on Jenkins - local curl_opts="-L" - local wget_opts="" - if [ -n "$AMPLAB_JENKINS" ]; then - curl_opts="-s ${curl_opts}" - wget_opts="--quiet ${wget_opts}" - else - curl_opts="--progress-bar ${curl_opts}" - wget_opts="--progress=bar:force ${wget_opts}" - fi - - if [ -z "$3" -o ! -f "$binary" ]; then - # check if we already have the tarball - # check if we have curl installed - # download application - [ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \ - echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \ - curl ${curl_opts} "${remote_tarball}" > "${local_tarball}" - # if the file still doesn't exist, lets try `wget` and cross our fingers - [ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \ - echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \ - wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}" - # if both were unsuccessful, exit - [ ! -f "${local_tarball}" ] && \ - echo -n "ERROR: Cannot download $2 with cURL or wget; " && \ - echo "please install manually and try again." && \ - exit 2 - cd "${_DIR}" && tar -xzf "$2" - rm -rf "$local_tarball" - fi -} - -# Determine the Maven version from the root pom.xml file and -# install maven under the build/ folder if needed. -install_mvn() { - local MVN_VERSION=`grep "" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` - echo $MVN_VERSION - MVN_BIN="$(command -v mvn)" - if [ "$MVN_BIN" ]; then - local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')" - fi - # See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers - function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; } - if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then - local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua?action=download&filename='} - - install_app \ - "${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries" \ - "apache-maven-${MVN_VERSION}-bin.tar.gz" \ - "apache-maven-${MVN_VERSION}/bin/mvn" - - MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn" - fi -} - -# Install zinc under the build/ folder -install_zinc() { - local zinc_path="zinc-0.3.15/bin/zinc" - [ ! -f "${_DIR}/${zinc_path}" ] && ZINC_INSTALL_FLAG=1 - local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.typesafe.com} - - install_app \ - "${TYPESAFE_MIRROR}/zinc/0.3.15" \ - "zinc-0.3.15.tgz" \ - "${zinc_path}" - ZINC_BIN="${_DIR}/${zinc_path}" -} - -# Determine the Scala version from the root pom.xml file, set the Scala URL, -# and, with that, download the specific version of Scala necessary under -# the build/ folder -install_scala() { - # determine the Scala version used in Spark - local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` - local scala_bin="${_DIR}/scala-${scala_version}/bin/scala" - local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.typesafe.com} - - install_app \ - "${TYPESAFE_MIRROR}/scala/${scala_version}" \ - "scala-${scala_version}.tgz" \ - "scala-${scala_version}/bin/scala" - - SCALA_COMPILER="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-compiler.jar" - SCALA_LIBRARY="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-library.jar" -} - -# Setup healthy defaults for the Zinc port if none were provided from -# the environment -ZINC_PORT=${ZINC_PORT:-"3030"} - -# Remove `--force` for backward compatibility. -if [ "$1" == "--force" ]; then - echo "WARNING: '--force' is deprecated and ignored." - shift -fi - -# Install the proper version of Scala, Zinc and Maven for the build -install_zinc -install_scala -install_mvn - -# Reset the current working directory -cd "${_CALLING_DIR}" - -# Now that zinc is ensured to be installed, check its status and, if its -# not running or just installed, start it -if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`"${ZINC_BIN}" -status -port ${ZINC_PORT}`" ]; then - export ZINC_OPTS=${ZINC_OPTS:-"$_COMPILE_JVM_OPTS"} - "${ZINC_BIN}" -shutdown -port ${ZINC_PORT} - "${ZINC_BIN}" -start -port ${ZINC_PORT} \ - -scala-compiler "${SCALA_COMPILER}" \ - -scala-library "${SCALA_LIBRARY}" &>/dev/null -fi - -# Set any `mvn` options if not already present -export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"} - -echo "Using \`mvn\` from path: $MVN_BIN" 1>&2 - -# Last, call the `mvn` command as usual -${MVN_BIN} -DzincPort=${ZINC_PORT} "$@" +BUILD_DIR=$(dirname $0) +MVN_RUNNER=$BUILD_DIR/run-mvn +curl -s https://raw.githubusercontent.com/apache/spark/master/build/mvn > $MVN_RUNNER +chmod +x $MVN_RUNNER +source $MVN_RUNNER diff --git a/scripts/parse-arguments.sh b/dev/dev-run-integration-tests.sh similarity index 51% rename from scripts/parse-arguments.sh rename to dev/dev-run-integration-tests.sh index 91d9c53..edead47 100755 --- a/scripts/parse-arguments.sh +++ b/dev/dev-run-integration-tests.sh @@ -17,18 +17,17 @@ # limitations under the License. # -set -ex TEST_ROOT_DIR=$(git rev-parse --show-toplevel) BRANCH="master" SPARK_REPO="https://github.com/apache/spark" SPARK_REPO_LOCAL_DIR="$TEST_ROOT_DIR/target/spark" -UNPACKED_SPARK_TGZ="$TEST_ROOT_DIR/target/spark-dist-unpacked" -IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" DEPLOY_MODE=minikube IMAGE_REPO="docker.io/kubespark" SKIP_BUILDING_IMAGES=false SPARK_TGZ="N/A" IMAGE_TAG="N/A" +MAVEN_ARGS=() +SPARK_MASTER= # Parse arguments while (( "$#" )); do @@ -41,14 +40,6 @@ while (( "$#" )); do SPARK_REPO="$2" shift ;; - --spark-repo-local-dir) - SPARK_REPO_LOCAL_DIR="$2" - shift - ;; - --unpacked-spark-tgz) - UNPACKED_SPARK_TGZ="$2" - shift - ;; --image-repo) IMAGE_REPO="$2" shift @@ -57,10 +48,6 @@ while (( "$#" )); do IMAGE_TAG="$2" shift ;; - --image-tag-output-file) - IMAGE_TAG_OUTPUT_FILE="$2" - shift - ;; --deploy-mode) DEPLOY_MODE="$2" shift @@ -69,10 +56,13 @@ while (( "$#" )); do SPARK_TGZ="$2" shift ;; - --skip-building-images) - SKIP_BUILDING_IMAGES="$2" + --maven-args) + MAVEN_ARGS=("$2") shift ;; + --skip-building-images) + SKIP_BUILDING_IMAGES=true + ;; *) break ;; @@ -80,3 +70,44 @@ while (( "$#" )); do shift done +if [[ $SPARK_TGZ == "N/A" ]]; +then + echo "Cloning $SPARK_REPO into $SPARK_REPO_LOCAL_DIR and checking out $BRANCH." + + # clone spark distribution if needed. + if [ -d "$SPARK_REPO_LOCAL_DIR" ]; + then + (cd $SPARK_REPO_LOCAL_DIR && git fetch origin $branch); + else + mkdir -p $SPARK_REPO_LOCAL_DIR; + git clone -b $BRANCH --single-branch $SPARK_REPO $SPARK_REPO_LOCAL_DIR; + fi + cd $SPARK_REPO_LOCAL_DIR + git checkout -B $BRANCH origin/$branch + ./dev/make-distribution.sh --tgz -Phadoop-2.7 -Pkubernetes -DskipTests; + SPARK_TGZ=$(find $SPARK_REPO_LOCAL_DIR -name spark-*.tgz) + echo "Built Spark TGZ at $SPARK_TGZ". + cd - +fi + +cd $TEST_ROOT_DIR + +if [ -z $SPARK_MASTER ]; +then + build/mvn integration-test \ + -Dspark.kubernetes.test.sparkTgz=$SPARK_TGZ \ + -Dspark.kubernetes.test.skipBuildingImages=$SKIP_BUILDING_IMAGES \ + -Dspark.kubernetes.test.imageTag=$IMAGE_TAG \ + -Dspark.kubernetes.test.imageRepo=$IMAGE_REPO \ + -Dspark.kubernetes.test.deployMode=$DEPLOY_MODE \ + "${MAVEN_ARGS[@]/#/-}"; +else + build/mvn integration-test \ + -Dspark.kubernetes.test.sparkTgz=$SPARK_TGZ \ + -Dspark.kubernetes.test.skipBuildingImages=$SKIP_BUILDING_IMAGES \ + -Dspark.kubernetes.test.imageTag=$IMAGE_TAG \ + -Dspark.kubernetes.test.imageRepo=$IMAGE_REPO \ + -Dspark.kubernetes.test.deployMode=$DEPLOY_MODE \ + -Dspark.kubernetes.test.master=$SPARK_MASTER \ + "${MAVEN_ARGS[@]/#/-}"; +fi diff --git a/pom.xml b/pom.xml index 2fae459..722a908 100644 --- a/pom.xml +++ b/pom.xml @@ -40,15 +40,11 @@ 1.0 1.7.24 kubernetes-integration-tests - N/A - ${project.build.directory}/spark ${project.build.directory}/spark-dist-unpacked - master N/A ${project.build.directory}/imageTag.txt minikube docker.io/kubespark - https://github.com/apache/spark false @@ -142,15 +138,6 @@ scripts/setup-integration-test-env.sh - --spark-branch - ${spark.kubernetes.test.sparkBranch} - - --spark-repo - ${spark.kubernetes.test.sparkRepo} - - --spark-repo-local-dir - ${spark.kubernetes.test.sparkRepo.localDir} - --unpacked-spark-tgz ${spark.kubernetes.test.unpackSparkDir} diff --git a/scripts/build-spark.sh b/scripts/build-spark.sh deleted file mode 100755 index 9cf733b..0000000 --- a/scripts/build-spark.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -ex -SCRIPTS_DIR=$(dirname $0) -source "$SCRIPTS_DIR/parse-arguments.sh" "$@" - -cd $SPARK_REPO_LOCAL_DIR -rm -rf $UNPACKED_SPARK_TGZ -mkdir -p $UNPACKED_SPARK_TGZ -if [[ $SPARK_TGZ == "N/A" ]]; -then - ./dev/make-distribution.sh --tgz -Phadoop-2.7 -Pkubernetes -DskipTests; - tar -xzvf $SPARK_REPO_LOCAL_DIR/spark-*.tgz --strip-components=1 -C $UNPACKED_SPARK_TGZ; -else - tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ -fi; diff --git a/scripts/clone-spark.sh b/scripts/clone-spark.sh deleted file mode 100755 index eb0c7c9..0000000 --- a/scripts/clone-spark.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -ex -SCRIPTS_DIR=$(dirname $0) -source "$SCRIPTS_DIR/parse-arguments.sh" "$@" -# clone spark distribution if needed. -if [ -d "$SPARK_REPO_LOCAL_DIR" ]; -then - (cd $SPARK_REPO_LOCAL_DIR && git fetch origin $BRANCH); -else - git clone -b $BRANCH --single-branch $SPARK_REPO $SPARK_REPO_LOCAL_DIR; -fi - -cd $SPARK_REPO_LOCAL_DIR -git checkout -B $BRANCH origin/$BRANCH diff --git a/scripts/prepare-docker-images.sh b/scripts/prepare-docker-images.sh deleted file mode 100755 index 78bc6e3..0000000 --- a/scripts/prepare-docker-images.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -set -ex -SCRIPTS_DIR=$(dirname $0) -source "$SCRIPTS_DIR/parse-arguments.sh" "$@" - -if [[ $IMAGE_TAG == "N/A" ]]; -then - IMAGE_TAG=$(uuidgen); -fi - -$SCRIPTS_DIR/write-docker-tag.sh --image-tag $IMAGE_TAG --image-tag-output-file $IMAGE_TAG_OUTPUT_FILE - -if [ ! -d "$UNPACKED_SPARK_TGZ" ]; -then - echo "No unpacked distribution was found at $UNPACKED_SPARK_TGZ. Please run clone-spark.sh and build-spark.sh first." && exit 1; -fi - -cd $UNPACKED_SPARK_TGZ -if [[ $DEPLOY_MODE == cloud ]] ; -then - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build - if [[ $IMAGE_REPO == gcr.io* ]] ; - then - gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG && \ - else - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push - fi -else - # -m option for minikube. - eval $(minikube docker-env) - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build -fi diff --git a/scripts/setup-integration-test-env.sh b/scripts/setup-integration-test-env.sh index e327ac1..3f9c23c 100755 --- a/scripts/setup-integration-test-env.sh +++ b/scripts/setup-integration-test-env.sh @@ -16,19 +16,86 @@ # See the License for the specific language governing permissions and # limitations under the License. # +TEST_ROOT_DIR=$(git rev-parse --show-toplevel) +UNPACKED_SPARK_TGZ="$TEST_ROOT_DIR/target/spark-dist-unpacked" +IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" +DEPLOY_MODE=minikube +IMAGE_REPO="docker.io/kubespark" +SKIP_BUILDING_IMAGES=false +SPARK_TGZ="N/A" +IMAGE_TAG="N/A" -SCRIPTS_DIR=$(dirname $0) -source $SCRIPTS_DIR/parse-arguments.sh "$@" +# Parse arguments +while (( "$#" )); do + case $1 in + --unpacked-spark-tgz) + UNPACKED_SPARK_TGZ="$2" + shift + ;; + --image-repo) + IMAGE_REPO="$2" + shift + ;; + --image-tag) + IMAGE_TAG="$2" + shift + ;; + --image-tag-output-file) + IMAGE_TAG_OUTPUT_FILE="$2" + shift + ;; + --deploy-mode) + DEPLOY_MODE="$2" + shift + ;; + --spark-tgz) + SPARK_TGZ="$2" + shift + ;; + --skip-building-images) + SKIP_BUILDING_IMAGES="$2" + shift + ;; + *) + break + ;; + esac + shift +done if [[ $SPARK_TGZ == "N/A" ]]; then - $SCRIPTS_DIR/clone-spark.sh "$@"; + echo "Must specify a Spark tarball to build Docker images against with --spark-tgz." && exit 1; +fi + +rm -rf $UNPACKED_SPARK_TGZ +mkdir -p $UNPACKED_SPARK_TGZ +tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ; + +if [[ $IMAGE_TAG == "N/A" ]]; +then + IMAGE_TAG=$(uuidgen); fi -$SCRIPTS_DIR/build-spark.sh "$@" if [[ $SKIP_BUILDING_IMAGES == false ]] ; then - $SCRIPTS_DIR/prepare-docker-images.sh "$@"; -else - $SCRIPTS_DIR/write-docker-tag.sh "$@"; + cd $UNPACKED_SPARK_TGZ + if [[ $DEPLOY_MODE == cloud ]] ; + then + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build + if [[ $IMAGE_REPO == gcr.io* ]] ; + then + gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG + else + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push + fi + else + # -m option for minikube. + eval $(minikube docker-env) + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build + fi + cd - fi + +rm -f $IMAGE_TAG_OUTPUT_FILE +echo -n $IMAGE_TAG > $IMAGE_TAG_OUTPUT_FILE diff --git a/scripts/write-docker-tag.sh b/scripts/write-docker-tag.sh deleted file mode 100755 index f1d17c2..0000000 --- a/scripts/write-docker-tag.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -SCRIPTS_DIR=$(dirname $0) -source $SCRIPTS_DIR/parse-arguments.sh "$@" - -if [[ $IMAGE_TAG == "N/A" ]]; -then - echo "Image tag must be specified via --image-tag to write."; -fi - -rm -f $IMAGE_TAG_OUTPUT_FILE -touch $IMAGE_TAG_OUTPUT_FILE -echo -n $IMAGE_TAG > $IMAGE_TAG_OUTPUT_FILE From dc97080895de9ecacdf03a3901f9cbe8b0c0d0ef Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 18:54:36 -0800 Subject: [PATCH 10/22] Small docs fix --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 1fc4f7a..3e0c939 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,7 @@ You can download Minikube [here](https://github.com/kubernetes/minikube/releases # Integration test customization -Configuration of the integration test runtime is done through passing different Java system properties to the Maven -command. The main useful options are outlined below. +Configuration of the integration test runtime is done through passing different arguments to the test script. The main useful options are outlined below. ## Use a non-local cluster From f6894151b0c2f185c1322c2a0ca29abb4598485f Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 18:55:33 -0800 Subject: [PATCH 11/22] Docs formatting fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3e0c939..8a667ab 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ An example: Additionally, you can use a pre-built Spark distribution. In this case, the repository is not cloned at all, and no source code has to be compiled. -* `--spark-tgz ` - set ` to point to a tarball containing the Spark distribution to test. +* `--spark-tgz ` - set `` to point to a tarball containing the Spark distribution to test. When the tests are cloning a repository and building it, the Spark distribution is placed in `target/spark/spark-.tgz`. Reuse this tarball to save a significant amount of time if you are iterating on From 81285c12a05e296ef1c8970dcc01a141279ce2a9 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 19:00:26 -0800 Subject: [PATCH 12/22] Spark TGZ can be empty instead of N/A, throw an error if not provided. --- scripts/setup-integration-test-env.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/setup-integration-test-env.sh b/scripts/setup-integration-test-env.sh index 3f9c23c..653e895 100755 --- a/scripts/setup-integration-test-env.sh +++ b/scripts/setup-integration-test-env.sh @@ -22,8 +22,8 @@ IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" DEPLOY_MODE=minikube IMAGE_REPO="docker.io/kubespark" SKIP_BUILDING_IMAGES=false -SPARK_TGZ="N/A" IMAGE_TAG="N/A" +SPARK_TGZ= # Parse arguments while (( "$#" )); do @@ -63,7 +63,7 @@ while (( "$#" )); do shift done -if [[ $SPARK_TGZ == "N/A" ]]; +if [ -z $SPARK_TGZ ]; then echo "Must specify a Spark tarball to build Docker images against with --spark-tgz." && exit 1; fi From d7b44d1f0fa9e60eeab4f2eed8800a5d6198e53f Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 19:08:10 -0800 Subject: [PATCH 13/22] Remove extraneous --skip-building-docker-images flag. --- README.md | 17 +++++++---------- dev/dev-run-integration-tests.sh | 6 ------ scripts/setup-integration-test-env.sh | 9 --------- 3 files changed, 7 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 8a667ab..27fb647 100644 --- a/README.md +++ b/README.md @@ -41,16 +41,14 @@ Therefore the command looks like this: ## Re-using Docker Images By default, the test framework will build new Docker images on every test execution. A unique image tag is generated, -and it is written to file at `target/imageTag.txt`. To reuse the images built in a previous run, pass these arguments: +and it is written to file at `target/imageTag.txt`. To reuse the images built in a previous run, or to use a Docker image tag +that you have built by other means already, pass the tag to the test script: -* `--image-tag ` - set `` to the tag specified in `target/imageTag.txt` -* `--skip-building-images` to inform the framework to not build the images. + dev/dev-run-integration-tests.sh --image-tag -Therefore the command looks like this: +where if you still want to use images that were built before by the test framework: - dev/dev-run-integration-tests.sh \ - --image-tag $(cat target/imageTag.txt) \ - --skip-building-images + dev/dev-run-integration-tests.sh --image-tag $(cat target/imageTag.txt) ## Customizing the Spark Source Code to Test @@ -72,6 +70,5 @@ source code has to be compiled. * `--spark-tgz ` - set `` to point to a tarball containing the Spark distribution to test. -When the tests are cloning a repository and building it, the Spark distribution is placed in -`target/spark/spark-.tgz`. Reuse this tarball to save a significant amount of time if you are iterating on -the development of these integration tests. +When the tests are cloning a repository and building it, the Spark distribution is placed in `target/spark/spark-.tgz`. +Reuse this tarball to save a significant amount of time if you are iterating on the development of these integration tests. diff --git a/dev/dev-run-integration-tests.sh b/dev/dev-run-integration-tests.sh index edead47..7bca061 100755 --- a/dev/dev-run-integration-tests.sh +++ b/dev/dev-run-integration-tests.sh @@ -23,7 +23,6 @@ SPARK_REPO="https://github.com/apache/spark" SPARK_REPO_LOCAL_DIR="$TEST_ROOT_DIR/target/spark" DEPLOY_MODE=minikube IMAGE_REPO="docker.io/kubespark" -SKIP_BUILDING_IMAGES=false SPARK_TGZ="N/A" IMAGE_TAG="N/A" MAVEN_ARGS=() @@ -60,9 +59,6 @@ while (( "$#" )); do MAVEN_ARGS=("$2") shift ;; - --skip-building-images) - SKIP_BUILDING_IMAGES=true - ;; *) break ;; @@ -96,7 +92,6 @@ if [ -z $SPARK_MASTER ]; then build/mvn integration-test \ -Dspark.kubernetes.test.sparkTgz=$SPARK_TGZ \ - -Dspark.kubernetes.test.skipBuildingImages=$SKIP_BUILDING_IMAGES \ -Dspark.kubernetes.test.imageTag=$IMAGE_TAG \ -Dspark.kubernetes.test.imageRepo=$IMAGE_REPO \ -Dspark.kubernetes.test.deployMode=$DEPLOY_MODE \ @@ -104,7 +99,6 @@ then else build/mvn integration-test \ -Dspark.kubernetes.test.sparkTgz=$SPARK_TGZ \ - -Dspark.kubernetes.test.skipBuildingImages=$SKIP_BUILDING_IMAGES \ -Dspark.kubernetes.test.imageTag=$IMAGE_TAG \ -Dspark.kubernetes.test.imageRepo=$IMAGE_REPO \ -Dspark.kubernetes.test.deployMode=$DEPLOY_MODE \ diff --git a/scripts/setup-integration-test-env.sh b/scripts/setup-integration-test-env.sh index 653e895..f41422c 100755 --- a/scripts/setup-integration-test-env.sh +++ b/scripts/setup-integration-test-env.sh @@ -21,7 +21,6 @@ UNPACKED_SPARK_TGZ="$TEST_ROOT_DIR/target/spark-dist-unpacked" IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" DEPLOY_MODE=minikube IMAGE_REPO="docker.io/kubespark" -SKIP_BUILDING_IMAGES=false IMAGE_TAG="N/A" SPARK_TGZ= @@ -52,10 +51,6 @@ while (( "$#" )); do SPARK_TGZ="$2" shift ;; - --skip-building-images) - SKIP_BUILDING_IMAGES="$2" - shift - ;; *) break ;; @@ -75,10 +70,6 @@ tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ; if [[ $IMAGE_TAG == "N/A" ]]; then IMAGE_TAG=$(uuidgen); -fi - -if [[ $SKIP_BUILDING_IMAGES == false ]] ; -then cd $UNPACKED_SPARK_TGZ if [[ $DEPLOY_MODE == cloud ]] ; then From 0882db7b20b1dc8cb898cf2581565d64ddc64a00 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 19:09:20 -0800 Subject: [PATCH 14/22] Switch back to using the N/A placeholder --- scripts/setup-integration-test-env.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/setup-integration-test-env.sh b/scripts/setup-integration-test-env.sh index f41422c..cd69b2a 100755 --- a/scripts/setup-integration-test-env.sh +++ b/scripts/setup-integration-test-env.sh @@ -22,7 +22,7 @@ IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" DEPLOY_MODE=minikube IMAGE_REPO="docker.io/kubespark" IMAGE_TAG="N/A" -SPARK_TGZ= +SPARK_TGZ="N/A" # Parse arguments while (( "$#" )); do @@ -58,7 +58,7 @@ while (( "$#" )); do shift done -if [ -z $SPARK_TGZ ]; +if [[ $SPARK_TGZ == "N/A" ]]; then echo "Must specify a Spark tarball to build Docker images against with --spark-tgz." && exit 1; fi From d69787b8a2b40a93e549bd483e409ba569b3db37 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 19:11:30 -0800 Subject: [PATCH 15/22] Remove extraneous code --- pom.xml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pom.xml b/pom.xml index 722a908..cdc987a 100644 --- a/pom.xml +++ b/pom.xml @@ -45,7 +45,6 @@ ${project.build.directory}/imageTag.txt minikube docker.io/kubespark - false jar @@ -155,9 +154,6 @@ --spark-tgz ${spark.kubernetes.test.sparkTgz} - - --skip-building-images - ${spark.kubernetes.test.skipBuildingImages} From 7a4a5d4ccf8d5ff148d29f6ee25e1b5b75320830 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 12 Jan 2018 19:33:39 -0800 Subject: [PATCH 16/22] Remove maven args because they don't work --- README.md | 2 +- dev/dev-run-integration-tests.sh | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/README.md b/README.md index 27fb647..54d2a94 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ The simplest way to run the integration tests is to install and run Minikube, th The minimum tested version of Minikube is 0.23.0. The kube-dns addon must be enabled. Minikube should run with a minimum of 3 CPUs and 4G of memory: - minikube start --cpus 3 --memory 4G + minikube start --cpus 3 --memory 4096 You can download Minikube [here](https://github.com/kubernetes/minikube/releases). diff --git a/dev/dev-run-integration-tests.sh b/dev/dev-run-integration-tests.sh index 7bca061..d2c4bd1 100755 --- a/dev/dev-run-integration-tests.sh +++ b/dev/dev-run-integration-tests.sh @@ -25,7 +25,6 @@ DEPLOY_MODE=minikube IMAGE_REPO="docker.io/kubespark" SPARK_TGZ="N/A" IMAGE_TAG="N/A" -MAVEN_ARGS=() SPARK_MASTER= # Parse arguments @@ -55,10 +54,6 @@ while (( "$#" )); do SPARK_TGZ="$2" shift ;; - --maven-args) - MAVEN_ARGS=("$2") - shift - ;; *) break ;; @@ -95,7 +90,6 @@ then -Dspark.kubernetes.test.imageTag=$IMAGE_TAG \ -Dspark.kubernetes.test.imageRepo=$IMAGE_REPO \ -Dspark.kubernetes.test.deployMode=$DEPLOY_MODE \ - "${MAVEN_ARGS[@]/#/-}"; else build/mvn integration-test \ -Dspark.kubernetes.test.sparkTgz=$SPARK_TGZ \ @@ -103,5 +97,4 @@ else -Dspark.kubernetes.test.imageRepo=$IMAGE_REPO \ -Dspark.kubernetes.test.deployMode=$DEPLOY_MODE \ -Dspark.kubernetes.test.master=$SPARK_MASTER \ - "${MAVEN_ARGS[@]/#/-}"; fi From b33e962785cb07c6fe2b41c6a2d2324206d2b66e Mon Sep 17 00:00:00 2001 From: mcheah Date: Mon, 15 Jan 2018 11:04:44 -0800 Subject: [PATCH 17/22] Fix scripts --- dev/dev-run-integration-tests.sh | 4 ++-- scripts/setup-integration-test-env.sh | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dev/dev-run-integration-tests.sh b/dev/dev-run-integration-tests.sh index d2c4bd1..81600ce 100755 --- a/dev/dev-run-integration-tests.sh +++ b/dev/dev-run-integration-tests.sh @@ -89,12 +89,12 @@ then -Dspark.kubernetes.test.sparkTgz=$SPARK_TGZ \ -Dspark.kubernetes.test.imageTag=$IMAGE_TAG \ -Dspark.kubernetes.test.imageRepo=$IMAGE_REPO \ - -Dspark.kubernetes.test.deployMode=$DEPLOY_MODE \ + -Dspark.kubernetes.test.deployMode=$DEPLOY_MODE; else build/mvn integration-test \ -Dspark.kubernetes.test.sparkTgz=$SPARK_TGZ \ -Dspark.kubernetes.test.imageTag=$IMAGE_TAG \ -Dspark.kubernetes.test.imageRepo=$IMAGE_REPO \ -Dspark.kubernetes.test.deployMode=$DEPLOY_MODE \ - -Dspark.kubernetes.test.master=$SPARK_MASTER \ + -Dspark.kubernetes.test.master=$SPARK_MASTER; fi diff --git a/scripts/setup-integration-test-env.sh b/scripts/setup-integration-test-env.sh index cd69b2a..b39e865 100755 --- a/scripts/setup-integration-test-env.sh +++ b/scripts/setup-integration-test-env.sh @@ -82,7 +82,6 @@ then fi else # -m option for minikube. - eval $(minikube docker-env) $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build fi cd - From 555a8980c5e8c8a1f0d7b69ed26c7823403b67bf Mon Sep 17 00:00:00 2001 From: mcheah Date: Mon, 15 Jan 2018 11:13:11 -0800 Subject: [PATCH 18/22] Don't get Maven if it's already there --- build/mvn | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/build/mvn b/build/mvn index 85738d0..87e5b58 100755 --- a/build/mvn +++ b/build/mvn @@ -18,7 +18,12 @@ # BUILD_DIR=$(dirname $0) + MVN_RUNNER=$BUILD_DIR/run-mvn -curl -s https://raw.githubusercontent.com/apache/spark/master/build/mvn > $MVN_RUNNER -chmod +x $MVN_RUNNER + +if [ ! -f $MVN_RUNNER ]; +then + curl -s --progress-bar https://raw.githubusercontent.com/apache/spark/master/build/mvn > $MVN_RUNNER + chmod +x $MVN_RUNNER +fi source $MVN_RUNNER From 01052979d4d14cfc60520779b8b3e90b301d0527 Mon Sep 17 00:00:00 2001 From: mcheah Date: Mon, 15 Jan 2018 11:20:46 -0800 Subject: [PATCH 19/22] Put quotes everywhere --- dev/dev-run-integration-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/dev-run-integration-tests.sh b/dev/dev-run-integration-tests.sh index 81600ce..2eb8327 100755 --- a/dev/dev-run-integration-tests.sh +++ b/dev/dev-run-integration-tests.sh @@ -21,7 +21,7 @@ TEST_ROOT_DIR=$(git rev-parse --show-toplevel) BRANCH="master" SPARK_REPO="https://github.com/apache/spark" SPARK_REPO_LOCAL_DIR="$TEST_ROOT_DIR/target/spark" -DEPLOY_MODE=minikube +DEPLOY_MODE="minikube" IMAGE_REPO="docker.io/kubespark" SPARK_TGZ="N/A" IMAGE_TAG="N/A" From f10c3b56651e64ec6e84830907efd10d5a226bfa Mon Sep 17 00:00:00 2001 From: mcheah Date: Tue, 16 Jan 2018 11:16:17 -0800 Subject: [PATCH 20/22] Minor formatting --- scripts/setup-integration-test-env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/setup-integration-test-env.sh b/scripts/setup-integration-test-env.sh index b39e865..ccfb8e7 100755 --- a/scripts/setup-integration-test-env.sh +++ b/scripts/setup-integration-test-env.sh @@ -19,7 +19,7 @@ TEST_ROOT_DIR=$(git rev-parse --show-toplevel) UNPACKED_SPARK_TGZ="$TEST_ROOT_DIR/target/spark-dist-unpacked" IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" -DEPLOY_MODE=minikube +DEPLOY_MODE="minikube" IMAGE_REPO="docker.io/kubespark" IMAGE_TAG="N/A" SPARK_TGZ="N/A" From 87f7fb67906589a1d67fdb9d92d511b857a04fd1 Mon Sep 17 00:00:00 2001 From: mcheah Date: Tue, 16 Jan 2018 11:29:25 -0800 Subject: [PATCH 21/22] Hard set Minikube binary location. --- .../integrationtest/backend/minikube/Minikube.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala index cd1365a..94bc9b7 100644 --- a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala @@ -16,6 +16,7 @@ */ package org.apache.spark.deploy.k8s.integrationtest.backend.minikube +import java.io.File import java.nio.file.Paths import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} @@ -24,6 +25,8 @@ import org.apache.spark.deploy.k8s.integrationtest.{Logging, ProcessUtils} // TODO support windows private[spark] object Minikube extends Logging { + + private val MINIKUBE_EXECUTABLE_LOCATION = "/usr/local/bin/minikube" private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60 def getMinikubeIp: String = { @@ -57,8 +60,15 @@ private[spark] object Minikube extends Logging { } private def executeMinikube(action: String, args: String*): Seq[String] = { + val minikubeBinary = new File(MINIKUBE_EXECUTABLE_LOCATION) + require( + minikubeBinary.isFile, + s"Minikube binary was not found at $MINIKUBE_EXECUTABLE_LOCATION.") + require( + minikubeBinary.canExecute, + s"Minikube binary at $MINIKUBE_EXECUTABLE_LOCATION is not executable.") ProcessUtils.executeProcess( - Array("minikube", action) ++ args, MINIKUBE_STARTUP_TIMEOUT_SECONDS) + Array("/usr/local/bin/minikube", action) ++ args, MINIKUBE_STARTUP_TIMEOUT_SECONDS) } } From e9035aa6415c4ae8e0112a1ba5ab46543c3a4545 Mon Sep 17 00:00:00 2001 From: mcheah Date: Tue, 16 Jan 2018 12:54:24 -0800 Subject: [PATCH 22/22] Run Minikube from bash -c --- .../deploy/k8s/integrationtest/ProcessUtils.scala | 2 +- .../integrationtest/backend/minikube/Minikube.scala | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala index e9f143c..aa6425d 100644 --- a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala @@ -39,6 +39,6 @@ object ProcessUtils extends Logging { assert(proc.waitFor(timeout, TimeUnit.SECONDS), s"Timed out while executing ${fullCommand.mkString(" ")}") assert(proc.exitValue == 0, s"Failed to execute ${fullCommand.mkString(" ")}") - outputLines.toSeq + outputLines } } diff --git a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala index 94bc9b7..7145d85 100644 --- a/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala +++ b/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala @@ -26,7 +26,6 @@ import org.apache.spark.deploy.k8s.integrationtest.{Logging, ProcessUtils} // TODO support windows private[spark] object Minikube extends Logging { - private val MINIKUBE_EXECUTABLE_LOCATION = "/usr/local/bin/minikube" private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60 def getMinikubeIp: String = { @@ -60,15 +59,8 @@ private[spark] object Minikube extends Logging { } private def executeMinikube(action: String, args: String*): Seq[String] = { - val minikubeBinary = new File(MINIKUBE_EXECUTABLE_LOCATION) - require( - minikubeBinary.isFile, - s"Minikube binary was not found at $MINIKUBE_EXECUTABLE_LOCATION.") - require( - minikubeBinary.canExecute, - s"Minikube binary at $MINIKUBE_EXECUTABLE_LOCATION is not executable.") ProcessUtils.executeProcess( - Array("/usr/local/bin/minikube", action) ++ args, MINIKUBE_STARTUP_TIMEOUT_SECONDS) + Array("bash", "-c", s"minikube $action") ++ args, MINIKUBE_STARTUP_TIMEOUT_SECONDS) } }