Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions docker/hoodie/hadoop/base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ MAINTAINER Hoodie
USER root

# Default to UTF-8 file.encoding
ENV LANG C.UTF-8
ENV LANG=C.UTF-8

ARG HADOOP_VERSION=3.3.4
ARG HADOOP_VERSION=3.3.4
ARG HADOOP_URL=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
ENV HADOOP_VERSION ${HADOOP_VERSION}
ENV HADOOP_URL ${HADOOP_URL}
ENV HADOOP_VERSION=${HADOOP_VERSION}
ENV HADOOP_URL=${HADOOP_URL}

RUN set -x \
&& DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq install curl wget netcat procps \
Expand All @@ -46,7 +46,7 @@ ENV MULTIHOMED_NETWORK=1
ENV HADOOP_HOME=${HADOOP_PREFIX}
ENV HADOOP_INSTALL=${HADOOP_HOME}
ENV USER=root
ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH
ENV PATH=/usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH

# Exposing a union of ports across hadoop versions
# Well known ports including ssh
Expand Down
10 changes: 5 additions & 5 deletions docker/hoodie/hadoop/base_java11/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ LABEL maintainer="Hoodie"
USER root

# Default to UTF-8 file.encoding
ENV LANG C.UTF-8
ENV LANG=C.UTF-8

ARG HADOOP_VERSION=2.8.4
ARG HADOOP_VERSION=2.8.4
ARG HADOOP_URL=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
ENV HADOOP_VERSION ${HADOOP_VERSION}
ENV HADOOP_URL ${HADOOP_URL}
ENV HADOOP_VERSION=${HADOOP_VERSION}
ENV HADOOP_URL=${HADOOP_URL}

RUN set -x \
&& DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq install curl wget netcat procps \
Expand All @@ -45,7 +45,7 @@ ENV MULTIHOMED_NETWORK=1
ENV HADOOP_HOME=${HADOOP_PREFIX}
ENV HADOOP_INSTALL=${HADOOP_HOME}
ENV USER=root
ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH
ENV PATH=/usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH

# Exposing a union of ports across hadoop versions
# Well known ports including ssh
Expand Down
2 changes: 1 addition & 1 deletion docker/hoodie/hadoop/base_java17/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ LABEL maintainer="Hoodie"
USER root

# Default to UTF-8 file.encoding
ENV LANG C.UTF-8
ENV LANG=C.UTF-8

ARG HADOOP_VERSION=3.4.0
ENV HADOOP_VERSION=${HADOOP_VERSION}
Expand Down
3 changes: 2 additions & 1 deletion docker/hoodie/hadoop/datanode/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ ARG HADOOP_VERSION=3.3.4
ARG HADOOP_DN_PORT=50075
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest

ENV HADOOP_DN_PORT ${HADOOP_DN_PORT}
ARG HADOOP_DN_PORT
ENV HADOOP_DN_PORT=${HADOOP_DN_PORT}

ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
RUN mkdir -p /hadoop/dfs/data
Expand Down
3 changes: 2 additions & 1 deletion docker/hoodie/hadoop/historyserver/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ RUN wget https://repo1.maven.org/maven2/org/openlabtesting/leveldbjni/leveldbjni
ENV LD_LIBRARY_PATH="/usr/lib"
ENV JAVA_LIBRARY_PATH="/usr/lib"

ENV HADOOP_HISTORY_PORT ${HADOOP_HISTORY_PORT}
ARG HADOOP_HISTORY_PORT
ENV HADOOP_HISTORY_PORT=${HADOOP_HISTORY_PORT}

ENV YARN_CONF_yarn_timeline___service_leveldb___timeline___store_path=/hadoop/yarn/timeline
RUN mkdir -p /hadoop/yarn/timeline
Expand Down
14 changes: 7 additions & 7 deletions docker/hoodie/hadoop/hive_base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@
ARG HADOOP_VERSION=3.3.4
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest

ENV HIVE_HOME /opt/hive
ENV PATH $HIVE_HOME/bin:$PATH
ENV HADOOP_HOME /opt/hadoop-$HADOOP_VERSION
ENV HIVE_HOME=/opt/hive
ENV PATH=$HIVE_HOME/bin:$PATH
ENV HADOOP_HOME=/opt/hadoop-$HADOOP_VERSION

WORKDIR /opt

ARG HIVE_VERSION=3.1.3
ARG HIVE_URL=https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
ENV HIVE_VERSION ${HIVE_VERSION}
ENV HIVE_URL ${HIVE_URL}
ENV HIVE_VERSION=${HIVE_VERSION}
ENV HIVE_URL=${HIVE_URL}

#Install Hive MySQL, PostgreSQL JDBC
RUN echo "Hive URL is :${HIVE_URL}" && wget ${HIVE_URL} -O hive.tar.gz && \
Expand Down Expand Up @@ -61,9 +61,9 @@ RUN chmod +x /usr/local/bin/startup.sh
COPY entrypoint.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/entrypoint.sh

ENV PATH $HIVE_HOME/bin/:$PATH
ENV PATH=$HIVE_HOME/bin/:$PATH
# NOTE: This is the only battle-proven method to inject jars into Hive CLI
ENV AUX_CLASSPATH=file://${HUDI_HADOOP_BUNDLE}

ENTRYPOINT ["entrypoint.sh"]
CMD startup.sh
CMD ["startup.sh"]
3 changes: 2 additions & 1 deletion docker/hoodie/hadoop/namenode/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ ARG HADOOP_VERSION=3.3.4
ARG HADOOP_WEBHDFS_PORT=50070
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest

ENV HADOOP_WEBHDFS_PORT ${HADOOP_WEBHDFS_PORT}
ARG HADOOP_WEBHDFS_PORT
ENV HADOOP_WEBHDFS_PORT=${HADOOP_WEBHDFS_PORT}

ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
RUN mkdir -p /hadoop/dfs/name
Expand Down
20 changes: 10 additions & 10 deletions docker/hoodie/hadoop/prestobase/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest as hadoop-base

ARG PRESTO_VERSION=0.271

ENV PRESTO_VERSION ${PRESTO_VERSION}
ENV PRESTO_HOME /opt/presto-server-${PRESTO_VERSION}
ENV PRESTO_CONF_DIR ${PRESTO_HOME}/etc
ENV PRESTO_LOG_DIR /var/log/presto
ENV PRESTO_JVM_MAX_HEAP 2G
ENV PRESTO_QUERY_MAX_MEMORY 1GB
ENV PRESTO_QUERY_MAX_MEMORY_PER_NODE 512MB
ENV PRESTO_DISCOVERY_URI http://presto-coordinator-1:8090
ENV PATH $PATH:${PRESTO_HOME}/bin
ENV PRESTO_VERSION=${PRESTO_VERSION}
ENV PRESTO_HOME=/opt/presto-server-${PRESTO_VERSION}
ENV PRESTO_CONF_DIR=${PRESTO_HOME}/etc
ENV PRESTO_LOG_DIR=/var/log/presto
ENV PRESTO_JVM_MAX_HEAP=2G
ENV PRESTO_QUERY_MAX_MEMORY=1GB
ENV PRESTO_QUERY_MAX_MEMORY_PER_NODE=512MB
ENV PRESTO_DISCOVERY_URI=http://presto-coordinator-1:8090
ENV PATH=$PATH:${PRESTO_HOME}/bin

RUN set -x \
&& DEBIAN_FRONTEND=noninteractive apt-get -yq update \
Expand Down Expand Up @@ -77,7 +77,7 @@ COPY lib/* /usr/local/lib/
RUN chmod +x /usr/local/bin/entrypoint.sh

ADD target/ /var/hoodie/ws/docker/hoodie/hadoop/prestobase/target/
ENV HUDI_PRESTO_BUNDLE /var/hoodie/ws/docker/hoodie/hadoop/prestobase/target/hudi-presto-bundle.jar
ENV HUDI_PRESTO_BUNDLE=/var/hoodie/ws/docker/hoodie/hadoop/prestobase/target/hudi-presto-bundle.jar
RUN cp ${HUDI_PRESTO_BUNDLE} ${PRESTO_HOME}/plugin/hive-hadoop2/
# TODO: the latest master of Presto relies on hudi-presto-bundle, while current Presto releases
# rely on hudi-common and hudi-hadoop-mr 0.9.0, which are pulled in plugin/hive-hadoop2/ in the
Expand Down
26 changes: 13 additions & 13 deletions docker/hoodie/hadoop/spark_base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ ARG HADOOP_VERSION=3.3.4
ARG HIVE_VERSION=3.1.3
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}

ENV ENABLE_INIT_DAEMON true
ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon
ENV INIT_DAEMON_STEP spark_master_init
ENV ENABLE_INIT_DAEMON=true
ENV INIT_DAEMON_BASE_URI=http://identifier/init-daemon
ENV INIT_DAEMON_STEP=spark_master_init

ARG SPARK_VERSION=3.5.3
ARG SPARK_HADOOP_VERSION=3

ENV SPARK_VERSION ${SPARK_VERSION}
ENV HADOOP_VERSION ${SPARK_HADOOP_VERSION}
ENV SPARK_VERSION=${SPARK_VERSION}
ENV HADOOP_VERSION=${SPARK_HADOOP_VERSION}

COPY wait-for-step.sh /
COPY execute-step.sh /
Expand All @@ -52,16 +52,16 @@ RUN chmod +x /wait-for-step.sh && chmod +x /execute-step.sh && chmod +x /finish-

# Fix the value of PYTHONHASHSEED
# Note: this is needed when you use Python 3.3 or greater
ENV PYTHONHASHSEED 1
ENV PYTHONHASHSEED=1

ENV SPARK_HOME /opt/spark
ENV SPARK_INSTALL ${SPARK_HOME}
ENV SPARK_CONF_DIR ${SPARK_HOME}/conf
ENV PATH $SPARK_INSTALL/bin:$PATH
ENV SPARK_HOME=/opt/spark
ENV SPARK_INSTALL=${SPARK_HOME}
ENV SPARK_CONF_DIR=${SPARK_HOME}/conf
ENV PATH=$SPARK_INSTALL/bin:$PATH

ENV SPARK_DRIVER_PORT 5001
ENV SPARK_UI_PORT 5002
ENV SPARK_BLOCKMGR_PORT 5003
ENV SPARK_DRIVER_PORT=5001
ENV SPARK_UI_PORT=5002
ENV SPARK_BLOCKMGR_PORT=5003

EXPOSE $SPARK_DRIVER_PORT $SPARK_UI_PORT $SPARK_BLOCKMGR_PORT

Expand Down
10 changes: 5 additions & 5 deletions docker/hoodie/hadoop/sparkadhoc/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ ARG PRESTO_VERSION=0.268
ARG TRINO_VERSION=368
COPY adhoc.sh /opt/spark

ENV SPARK_WORKER_WEBUI_PORT 8081
ENV SPARK_WORKER_LOG /spark/logs
ENV SPARK_MASTER "spark://spark-master:7077"
ENV PRESTO_VERSION ${PRESTO_VERSION}
ENV TRINO_VERSION ${TRINO_VERSION}
ENV SPARK_WORKER_WEBUI_PORT=8081
ENV SPARK_WORKER_LOG=/spark/logs
ENV SPARK_MASTER="spark://spark-master:7077"
ENV PRESTO_VERSION=${PRESTO_VERSION}
ENV TRINO_VERSION=${TRINO_VERSION}
ENV BASE_URL=https://repo1.maven.org/maven2

ENV SPARK_BUNDLE_JAR=/var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar
Expand Down
6 changes: 3 additions & 3 deletions docker/hoodie/hadoop/sparkmaster/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${S

COPY master.sh /opt/spark

ENV SPARK_MASTER_PORT 7077
ENV SPARK_MASTER_WEBUI_PORT 8080
ENV SPARK_MASTER_LOG /opt/spark/logs
ENV SPARK_MASTER_PORT=7077
ENV SPARK_MASTER_WEBUI_PORT=8080
ENV SPARK_MASTER_LOG=/opt/spark/logs

EXPOSE 8080 7077 6066

Expand Down
6 changes: 3 additions & 3 deletions docker/hoodie/hadoop/sparkworker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${S

COPY worker.sh /opt/spark

ENV SPARK_WORKER_WEBUI_PORT 8081
ENV SPARK_WORKER_LOG /spark/logs
ENV SPARK_MASTER "spark://spark-master:7077"
ENV SPARK_WORKER_WEBUI_PORT=8081
ENV SPARK_WORKER_LOG=/spark/logs
ENV SPARK_MASTER="spark://spark-master:7077"

EXPOSE 8081

Expand Down
8 changes: 4 additions & 4 deletions docker/hoodie/hadoop/trinobase/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ RUN apt-get install -y \
uuid-runtime \
less

ENV JAVA_HOME /usr/java/default
ENV PATH $PATH:$JAVA_HOME/bin
ENV JAVA_HOME=/usr/java/default
ENV PATH=$PATH:$JAVA_HOME/bin

WORKDIR /usr/local/bin
RUN wget -q ${BASE_URL}/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar
Expand All @@ -53,10 +53,10 @@ RUN wget -q ${BASE_URL}/io/trino/trino-server/${TRINO_VERSION}/trino-server-${TR
RUN tar xvzf trino-server-${TRINO_VERSION}.tar.gz -C /usr/local/
RUN ln -s /usr/local/trino-server-${TRINO_VERSION} $TRINO_HOME

ENV TRINO_BASE_WS /var/hoodie/ws/docker/hoodie/hadoop/trinobase
ENV TRINO_BASE_WS=/var/hoodie/ws/docker/hoodie/hadoop/trinobase
RUN mkdir -p ${TRINO_BASE_WS}/target/
ADD target/ ${TRINO_BASE_WS}/target/
ENV HUDI_TRINO_BUNDLE ${TRINO_BASE_WS}/target/hudi-trino-bundle.jar
ENV HUDI_TRINO_BUNDLE=${TRINO_BASE_WS}/target/hudi-trino-bundle.jar
RUN cp ${HUDI_TRINO_BUNDLE} ${TRINO_HOME}/plugin/hive/

ADD scripts ${TRINO_HOME}/scripts
Expand Down
Loading