Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions assemblies/plugins/engines/beam/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,9 @@
<version>2.10.1</version>
</dependency>
<dependency>
<groupId>org.codehaus.woodstox</groupId>
<artifactId>woodstox-core-asl</artifactId>
<version>4.4.1</version>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
<version>5.0.3</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
Expand Down
10 changes: 6 additions & 4 deletions assemblies/plugins/engines/beam/src/assembly/assembly.xml
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,6 @@
<include>io.dropwizard.metrics:metrics-json</include>
<include>io.dropwizard.metrics:metrics-jvm</include>
<include>io.github.classgraph:classgraph</include>


<include>io.grpc:grpc-alts</include>
<include>io.grpc:grpc-api</include>
<include>io.grpc:grpc-auth</include>
Expand Down Expand Up @@ -195,7 +193,6 @@
<include>javax.annotation:javax.annotation-api</include>
<include>javax.servlet:javax.servlet-api</include>
<include>javax.xml.bind:jaxb-api</include>
<include>javax.xml.stream:stax-api</include>
<include>joda-time:joda-time</include>
<include>net.razorvine:pyrolite</include>
<include>net.sf.py4j:py4j</include>
Expand Down Expand Up @@ -279,14 +276,19 @@
<include>org.codehaus.jackson:jackson-mapper-asl</include>
<include>org.codehaus.jackson:jackson-xc</include>
<include>org.codehaus.mojo:animal-sniffer-annotations</include>
<include>org.codehaus.woodstox:woodstox-core-asl</include>
<include>com.fasterxml.woodstox:woodstox-core</include>
<include>org.conscrypt:conscrypt-openjdk-uber</include>
<include>org.fusesource.leveldbjni:leveldbjni-all</include>
<include>org.glassfish.hk2.external:aopalliance-repackaged</include>
<include>org.glassfish.hk2:hk2-api</include>
<include>org.glassfish.hk2:hk2-locator</include>
<include>org.glassfish.hk2:hk2-utils</include>
<include>org.glassfish.hk2:osgi-resource-locator</include>
<include>org.glassfish.jersey.containers:jersey-container-servlet</include>
<include>org.glassfish.jersey.containers:jersey-container-servlet-core</include>
<include>org.glassfish.jersey.core:jersey-client</include>
<include>org.glassfish.jersey.core:jersey-common</include>
<include>org.glassfish.jersey.core:jersey-server</include>
<include>org.glassfish.jersey.media:jersey-media-jaxb</include>
<include>org.lz4:lz4-java</include>
<include>org.mortbay.jetty:jetty-util</include>
Expand Down
1 change: 0 additions & 1 deletion assemblies/plugins/tech/parquet/src/assembly/assembly.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
<include>io.netty:netty:jar</include>
<include>jakarta.activation:jakarta.activation:jar</include>
<include>javax.xml.bind:jaxb-api:jar</include>
<include>javax.xml.stream:stax-api:jar</include>
<include>org.apache.curator:curator-client:jar</include>
<include>org.apache.curator:curator-framework:jar</include>
<include>org.apache.curator:curator-recipes:jar</include>
Expand Down
8 changes: 4 additions & 4 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
<jsch.version>0.1.54</jsch.version>
<jzlib.version>1.0.7</jzlib.version>
<ognl.version>2.6.9</ognl.version>
<woodstox-core-asl.version>4.4.1</woodstox-core-asl.version>
<woodstox-core.version>5.0.3</woodstox-core.version>
<xmlunit.version>1.5</xmlunit.version>
<javassist.version>3.28.0-GA</javassist.version>
<json-simple.version>1.1.1</json-simple.version>
Expand Down Expand Up @@ -476,9 +476,9 @@

<!-- Test dependencies -->
<dependency>
<groupId>org.codehaus.woodstox</groupId>
<artifactId>woodstox-core-asl</artifactId>
<version>${woodstox-core-asl.version}</version>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
<version>${woodstox-core.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
Expand Down
113 changes: 113 additions & 0 deletions docker/integration-tests/Dockerfile.unit-tests-spark
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

FROM ubuntu
MAINTAINER Apache Hop

# Argument Branch name, used to download correct version
ARG BRANCH_NAME
ENV BRANCH_NAME=$BRANCH_NAME
# path to where the artefacts should be deployed to
ENV DEPLOYMENT_PATH=/opt
# volume mount point
ENV VOLUME_MOUNT_POINT=/files
#Jenkins user an group
ARG JENKINS_USER=hop
ARG JENKINS_GROUP=hop
ARG JENKINS_UID=1000
ARG JENKINS_GID=1000
ARG GCP_KEY_FILE=
# Set system properties
ENV DEBIAN_FRONTEND=noninteractive

# any JRE settings you want to pass on
# The “-XX:+AggressiveHeap” tells the container to use all memory assigned to the container.
# this removed the need to calculate the necessary heap Xmx
ENV HOP_OPTIONS=-XX:+AggressiveHeap

# INSTALL REQUIRED PACKAGES AND ADJUST LOCALE
# procps: The package includes the programs ps, top, vmstat, w, kill, free, slabtop, and skill

RUN apt-get update \
&& apt-get install --assume-yes \
bash \
curl \
procps \
git \
python3-pip \
openjdk-11-jre-headless \
unzip \
ttf-mscorefonts-installer \
locales \
&& mkdir ${VOLUME_MOUNT_POINT} \
&& addgroup -gid ${JENKINS_GID} ${JENKINS_GROUP} \
&& useradd -m -d /home/${JENKINS_USER} -u ${JENKINS_UID} -g ${JENKINS_GROUP} ${JENKINS_USER} \
&& chown ${JENKINS_USER}:${JENKINS_GROUP} ${DEPLOYMENT_PATH} \
&& chown ${JENKINS_USER}:${JENKINS_GROUP} ${VOLUME_MOUNT_POINT}

# Set Locale correctly
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && \
locale-gen
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8

# Install parquet-tools from Python

RUN pip3 install parquet-tools

# Copy the hop package from the local resources folder to the container image directory

COPY --chown=${JENKINS_USER}:${JENKINS_GROUP} ./assemblies/client/target/hop-* ${DEPLOYMENT_PATH}/hop.zip

# Unzip and install in correct location

RUN unzip ${DEPLOYMENT_PATH}/hop.zip -d ${DEPLOYMENT_PATH} \
&& rm ${DEPLOYMENT_PATH}/hop.zip \
#Remove Jars for Spark
&& rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/flink-shaded-jackson* \
&& rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/jackson-module-scala* \
&& rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/scala-java8-compat* \
&& rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/scala-library* \
&& rm ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/scala-parser-combinators* \
#Add extra jars
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/com/fasterxml/jackson/module/jackson-module-scala_2.12/2.13.3/jackson-module-scala_2.12-2.13.3.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/json4s/json4s-ast_2.12/3.7.0-M5/json4s-ast_2.12-3.7.0-M5.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/json4s/json4s-core_2.12/3.7.0-M5/json4s-core_2.12-3.7.0-M5.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/json4s/json4s-jackson_2.12/3.7.0-M5/json4s-jackson_2.12-3.7.0-M5.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/json4s/json4s-scalap_2.12/3.7.0-M5/json4s-scalap_2.12-3.7.0-M5.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/log4j/log4j/1.2.17/log4j-1.2.17.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/scala-compiler/2.12.10/scala-compiler-2.12.10.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/scala-library/2.12.10/scala-library-2.12.10.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/modules/scala-parser-combinators_2.12/1.1.2/scala-parser-combinators_2.12-1.1.2.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/scala-reflect/2.12.10/scala-reflect-2.12.10.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/scala-lang/modules/scala-xml_2.12/1.2.0/scala-xml_2.12-1.2.0.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/apache/spark/spark-unsafe_2.12/3.1.3/spark-unsafe_2.12-3.1.3.jar \
&& wget -P ${DEPLOYMENT_PATH}/hop/plugins/engines/beam/lib/ https://repo1.maven.org/maven2/org/apache/xbean/xbean-asm7-shaded/4.15/xbean-asm7-shaded-4.15.jar \
&& chown -R ${JENKINS_USER}:${JENKINS_GROUP} ${DEPLOYMENT_PATH}/hop \
&& chmod 700 ${DEPLOYMENT_PATH}/hop/*.sh \
&& cd ${DEPLOYMENT_PATH}/hop \
&& ./hop-conf.sh --generate-fat-jar=/tmp/hop-fatjar.jar

# make volume available so that hop pipeline and workflow files can be provided easily
VOLUME ["/files"]
USER ${JENKINS_USER}
ENV PATH=$PATH:${DEPLOYMENT_PATH}/hop
ENV GOOGLE_APPLICATION_CREDENTIALS="/tmp/google-key-apache-hop-it.json"
WORKDIR /home/${JENKINS_USER}
# CMD ["/bin/bash"]
ENTRYPOINT []
34 changes: 34 additions & 0 deletions docker/integration-tests/integration-tests-base-spark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

version: '2.4'
services:
integration_test:
build:
context: ../../.
dockerfile: docker/integration-tests/Dockerfile.unit-tests-spark
args:
- JENKINS_USER=jenkins
- JENKINS_UID=1000
- JENKINS_GROUP=jenkins
- JENKINS_GID=1000
- GCP_KEY_FILE=./test
volumes:
- ../../integration-tests/:/files
environment:
- FLASK_ENV=docker
command: [ "bash", "-c", "/files/scripts/run-tests.sh ${PROJECT_NAME}" ]
43 changes: 43 additions & 0 deletions docker/integration-tests/integration-tests-spark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

version: '2.4'
services:
integration_test_spark:
extends:
file: integration-tests-base-spark.yaml
service: integration_test
depends_on:
- spark
links:
- spark
- spark-worker

spark:
build:
context: ../../docker/integration-tests/spark/.
dockerfile: Dockerfile.master
environment:
- INIT_DAEMON_STEP=setup_spark
spark-worker:
build:
context: ../../docker/integration-tests/spark/.
dockerfile: Dockerfile.worker
depends_on:
- spark
environment:
- "SPARK_MASTER=spark://spark:7077"
49 changes: 49 additions & 0 deletions docker/integration-tests/spark/Dockerfile.master
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM alpine:3.10

ENV ENABLE_INIT_DAEMON false
ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon
ENV INIT_DAEMON_STEP spark_master_init

ENV BASE_URL=https://archive.apache.org/dist/spark/
ENV SPARK_VERSION=3.1.3
ENV HADOOP_VERSION=3.2
ENV SPARK_MASTER_PORT 7077
ENV SPARK_MASTER_WEBUI_PORT 8080
ENV SPARK_MASTER_LOG /spark/logs

COPY ./scripts/wait-for-step.sh /
COPY ./scripts/execute-step.sh /
COPY ./scripts/finish-step.sh /
COPY ./scripts/master.sh /

RUN apk add --no-cache curl bash openjdk11-jre nss libc6-compat coreutils procps \
&& ln -s /lib64/ld-linux-x86-64.so.2 /lib/ld-linux-x86-64.so.2 \
&& chmod +x *.sh \
&& wget ${BASE_URL}/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark \
&& rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& cd /

#Give permission to execute scripts
RUN chmod +x /wait-for-step.sh && chmod +x /execute-step.sh && chmod +x /finish-step.sh && chmod +x /master.sh

EXPOSE 8080 7077 6066

CMD ["/bin/bash", "/master.sh"]
50 changes: 50 additions & 0 deletions docker/integration-tests/spark/Dockerfile.worker
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

FROM alpine:3.10

ENV ENABLE_INIT_DAEMON false
ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon
ENV INIT_DAEMON_STEP spark_master_init

ENV BASE_URL=https://archive.apache.org/dist/spark/
ENV SPARK_VERSION=3.1.3
ENV HADOOP_VERSION=3.2
ENV SPARK_WORKER_WEBUI_PORT 8081
ENV SPARK_WORKER_LOG /spark/logs
ENV SPARK_MASTER "spark://spark:7077"

COPY ./scripts/wait-for-step.sh /
COPY ./scripts/execute-step.sh /
COPY ./scripts/finish-step.sh /
COPY ./scripts/worker.sh /

RUN apk add --no-cache curl bash openjdk11-jre nss libc6-compat coreutils procps \
&& ln -s /lib64/ld-linux-x86-64.so.2 /lib/ld-linux-x86-64.so.2 \
&& chmod +x *.sh \
&& wget ${BASE_URL}/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark \
&& rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& cd /

#Give permission to execute scripts
RUN chmod +x /wait-for-step.sh && chmod +x /execute-step.sh && chmod +x /finish-step.sh && chmod +x /worker.sh

EXPOSE 8081

CMD ["/bin/bash", "/worker.sh"]
Loading