From cb42b7994cb3532c8666838c180c8a6b988b52ba Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Fri, 3 May 2024 20:26:58 +0800 Subject: [PATCH 01/10] refactor(dev/docker/hive): shrink hive Docker image size by 200MB --- dev/docker/hive/Dockerfile | 47 +++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile index 41c0e90180..031699c586 100644 --- a/dev/docker/hive/Dockerfile +++ b/dev/docker/hive/Dockerfile @@ -3,14 +3,31 @@ # This software is licensed under the Apache License version 2. # -FROM ubuntu:16.04 -LABEL maintainer="support@datastrato.com" +FROM ubuntu:16.04 AS packages ARG HADOOP_PACKAGE_NAME ARG HIVE_PACKAGE_NAME ARG JDBC_DIVER_PACKAGE_NAME ARG DEBIAN_FRONTEND=noninteractive +COPY packages /tmp/packages + +ENV JAVA_HOME=/usr/local/jdk +ENV HIVE_HOME=/usr/local/hive +ENV HADOOP_HOME=/usr/local/hadoop + +# hadoop +RUN mkdir ${HADOOP_HOME} +RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} && rm -rf /tmp/packages/${HADOOP_PACKAGE_NAME} +# hive +RUN mkdir ${HIVE_HOME} +RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} && rm -rf /tmp/packages/${HIVE_PACKAGE_NAME} +# add mysql jdbc driver +RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} && rm -rf /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} + +FROM ubuntu:16.04 +LABEL maintainer="support@datastrato.com" + WORKDIR / ################################################################################ @@ -45,7 +62,7 @@ RUN apt-get update && apt-get upgrade -y && apt-get install --fix-missing -yq \ RUN mkdir /root/.ssh RUN cat /dev/zero | ssh-keygen -q -N "" > /dev/null && cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys -COPY packages /tmp/packages +#COPY packages /tmp/packages ################################################################################ # set environment variables @@ -90,8 +107,12 @@ RUN echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> /etc/environment ################################################################################ # install hadoop -RUN mkdir ${HADOOP_HOME} -RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} +#RUN mkdir ${HADOOP_HOME} +COPY --from=packages /tmp/packages/${HADOOP_PACKAGE_NAME}/* ${HADOOP_HOME}/ +#RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} && rm -rf /tmp/packages/${HADOOP_PACKAGE_NAME} +#ADD packages/${HADOOP_PACKAGE_NAME} ${HADOOP_HOME}/ + +RUN ls -la ${HADOOP_HOME} # replace configuration templates RUN rm -f ${HADOOP_CONF_DIR}/core-site.xml @@ -110,8 +131,10 @@ RUN ${HADOOP_HOME}/bin/hdfs namenode -format -nonInteractive ################################################################################ # install hive -RUN mkdir ${HIVE_HOME} -RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} +#RUN mkdir ${HIVE_HOME} +COPY --from=packages /tmp/packages/${HIVE_PACKAGE_NAME}/* ${HIVE_HOME}/ +#RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} && rm -rf /tmp/packages/${HIVE_PACKAGE_NAME} +#ADD packages/${HIVE_PACKAGE_NAME} ${HIVE_HOME}/ ADD hive-site.xml ${HIVE_HOME}/conf/hive-site.xml ################################################################################ @@ -119,7 +142,7 @@ ADD hive-site.xml ${HIVE_HOME}/conf/hive-site.xml ENV MYSQL_PWD=ds123 RUN echo "mysql-server mysql-server/root_password password ${MYSQL_PWD}" | debconf-set-selections RUN echo "mysql-server mysql-server/root_password_again password ${MYSQL_PWD}" | debconf-set-selections -RUN apt-get install -y mysql-server +RUN apt-get install -y mysql-server && rm -rf /var/lib/apt/lists/* RUN chown -R mysql:mysql /var/lib/mysql RUN usermod -d /var/lib/mysql/ mysql @@ -127,7 +150,9 @@ RUN sed -i "s/.*bind-address.*/bind-address = 0.0.0.0/" /etc/mysql/mysql.conf.d/ ################################################################################ # add mysql jdbc driver -RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} +COPY --from=packages /tmp/packages/${JDBC_DIVER_PACKAGE_NAME}/* ${HIVE_HOME}/lib/ +#RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} && rm -rf /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} +# ADD packages/${JDBC_DIVER_PACKAGE_NAME} ${HIVE_HOME}/lib/ ################################################################################ # add users and groups @@ -146,8 +171,8 @@ RUN chown -R datastrato:hadoop /home/datastrato ################################################################################ # removed install packages and cache -RUN rm -rf /tmp/packages -RUN rm -rf /var/lib/apt/lists/* +#RUN rm -rf /tmp/packages +#RUN rm -rf /var/lib/apt/lists/* ################################################################################ # expose port From 3c74f90b258c14893b7c895544d21f308ce75d3b Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Sun, 5 May 2024 11:15:53 +0800 Subject: [PATCH 02/10] feat(hive): reduce hive container size from 2.27GB to 1.76GB --- dev/docker/hive/Dockerfile | 14 +++++++++----- dev/docker/hive/start.sh | 2 ++ .../integration/test/container/ContainerSuite.java | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile index 031699c586..a2e722c40b 100644 --- a/dev/docker/hive/Dockerfile +++ b/dev/docker/hive/Dockerfile @@ -19,9 +19,13 @@ ENV HADOOP_HOME=/usr/local/hadoop # hadoop RUN mkdir ${HADOOP_HOME} RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} && rm -rf /tmp/packages/${HADOOP_PACKAGE_NAME} + +RUN ls -la ${HADOOP_HOME} # hive RUN mkdir ${HIVE_HOME} RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} && rm -rf /tmp/packages/${HIVE_PACKAGE_NAME} + +RUN ls -la ${HIVE_HOME}/bin # add mysql jdbc driver RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} && rm -rf /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} @@ -62,8 +66,6 @@ RUN apt-get update && apt-get upgrade -y && apt-get install --fix-missing -yq \ RUN mkdir /root/.ssh RUN cat /dev/zero | ssh-keygen -q -N "" > /dev/null && cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys -#COPY packages /tmp/packages - ################################################################################ # set environment variables ENV JAVA_HOME=/usr/local/jdk @@ -108,7 +110,7 @@ RUN echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> /etc/environment ################################################################################ # install hadoop #RUN mkdir ${HADOOP_HOME} -COPY --from=packages /tmp/packages/${HADOOP_PACKAGE_NAME}/* ${HADOOP_HOME}/ +COPY --from=packages ${HADOOP_HOME} ${HADOOP_HOME} #RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} && rm -rf /tmp/packages/${HADOOP_PACKAGE_NAME} #ADD packages/${HADOOP_PACKAGE_NAME} ${HADOOP_HOME}/ @@ -132,7 +134,8 @@ RUN ${HADOOP_HOME}/bin/hdfs namenode -format -nonInteractive ################################################################################ # install hive #RUN mkdir ${HIVE_HOME} -COPY --from=packages /tmp/packages/${HIVE_PACKAGE_NAME}/* ${HIVE_HOME}/ +COPY --from=packages ${HIVE_HOME} ${HIVE_HOME} +RUN ls -la ${HIVE_HOME}/bin #RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} && rm -rf /tmp/packages/${HIVE_PACKAGE_NAME} #ADD packages/${HIVE_PACKAGE_NAME} ${HIVE_HOME}/ ADD hive-site.xml ${HIVE_HOME}/conf/hive-site.xml @@ -150,7 +153,8 @@ RUN sed -i "s/.*bind-address.*/bind-address = 0.0.0.0/" /etc/mysql/mysql.conf.d/ ################################################################################ # add mysql jdbc driver -COPY --from=packages /tmp/packages/${JDBC_DIVER_PACKAGE_NAME}/* ${HIVE_HOME}/lib/ +# FIXME: already copied at installation of HIVE_HOME ? +#COPY --from=packages ${HIVE_HOME}/lib/* ${HIVE_HOME}/lib/ #RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} && rm -rf /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} # ADD packages/${JDBC_DIVER_PACKAGE_NAME} ${HIVE_HOME}/lib/ diff --git a/dev/docker/hive/start.sh b/dev/docker/hive/start.sh index e12d52ab3d..611cb49c75 100644 --- a/dev/docker/hive/start.sh +++ b/dev/docker/hive/start.sh @@ -34,5 +34,7 @@ ${HIVE_HOME}/bin/schematool -initSchema -dbType mysql ${HIVE_HOME}/bin/hive --service hiveserver2 > /dev/null 2>&1 & ${HIVE_HOME}/bin/hive --service metastore > /dev/null 2>&1 & +echo "Hive started successfully." + # persist the container tail -f /dev/null diff --git a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java index 22b10306ab..f0ee590b93 100644 --- a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java +++ b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java @@ -88,6 +88,7 @@ public void startHiveContainer() { HiveContainer.Builder hiveBuilder = HiveContainer.builder() .withHostName("gravitino-ci-hive") + .withImage("unknowntpo/gravitino-ci-hive:shrink") .withEnvVars( ImmutableMap.builder() .put("HADOOP_USER_NAME", "datastrato") From 87230d6c21565194a3f64be3627696bf790a345e Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Sun, 5 May 2024 11:19:13 +0800 Subject: [PATCH 03/10] feat(hive): clean up --- dev/docker/hive/Dockerfile | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile index a2e722c40b..917cd7e90b 100644 --- a/dev/docker/hive/Dockerfile +++ b/dev/docker/hive/Dockerfile @@ -109,12 +109,7 @@ RUN echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> /etc/environment ################################################################################ # install hadoop -#RUN mkdir ${HADOOP_HOME} COPY --from=packages ${HADOOP_HOME} ${HADOOP_HOME} -#RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} && rm -rf /tmp/packages/${HADOOP_PACKAGE_NAME} -#ADD packages/${HADOOP_PACKAGE_NAME} ${HADOOP_HOME}/ - -RUN ls -la ${HADOOP_HOME} # replace configuration templates RUN rm -f ${HADOOP_CONF_DIR}/core-site.xml @@ -133,11 +128,7 @@ RUN ${HADOOP_HOME}/bin/hdfs namenode -format -nonInteractive ################################################################################ # install hive -#RUN mkdir ${HIVE_HOME} COPY --from=packages ${HIVE_HOME} ${HIVE_HOME} -RUN ls -la ${HIVE_HOME}/bin -#RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} && rm -rf /tmp/packages/${HIVE_PACKAGE_NAME} -#ADD packages/${HIVE_PACKAGE_NAME} ${HIVE_HOME}/ ADD hive-site.xml ${HIVE_HOME}/conf/hive-site.xml ################################################################################ @@ -151,13 +142,6 @@ RUN chown -R mysql:mysql /var/lib/mysql RUN usermod -d /var/lib/mysql/ mysql RUN sed -i "s/.*bind-address.*/bind-address = 0.0.0.0/" /etc/mysql/mysql.conf.d/mysqld.cnf -################################################################################ -# add mysql jdbc driver -# FIXME: already copied at installation of HIVE_HOME ? -#COPY --from=packages ${HIVE_HOME}/lib/* ${HIVE_HOME}/lib/ -#RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} && rm -rf /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} -# ADD packages/${JDBC_DIVER_PACKAGE_NAME} ${HIVE_HOME}/lib/ - ################################################################################ # add users and groups RUN groupadd hdfs && groupadd hadoop && groupadd hive && groupadd mapred @@ -173,11 +157,6 @@ RUN usermod -a -G mapred datastrato RUN mkdir /home/datastrato RUN chown -R datastrato:hadoop /home/datastrato -################################################################################ -# removed install packages and cache -#RUN rm -rf /tmp/packages -#RUN rm -rf /var/lib/apt/lists/* - ################################################################################ # expose port EXPOSE 3306 9000 9083 10000 10002 50070 50075 50010 From 193921d88d3bef52c8986ce7affe4fe6476721e2 Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Sun, 5 May 2024 11:29:03 +0800 Subject: [PATCH 04/10] feat(hive): remove unused debug log --- dev/docker/hive/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile index 917cd7e90b..7fcd7aa6cd 100644 --- a/dev/docker/hive/Dockerfile +++ b/dev/docker/hive/Dockerfile @@ -20,12 +20,10 @@ ENV HADOOP_HOME=/usr/local/hadoop RUN mkdir ${HADOOP_HOME} RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} && rm -rf /tmp/packages/${HADOOP_PACKAGE_NAME} -RUN ls -la ${HADOOP_HOME} # hive RUN mkdir ${HIVE_HOME} RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} && rm -rf /tmp/packages/${HIVE_PACKAGE_NAME} -RUN ls -la ${HIVE_HOME}/bin # add mysql jdbc driver RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} && rm -rf /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} From 52b59398b3358992fb5b37a4c253b3fe0ed4b62b Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Sun, 5 May 2024 16:43:48 +0800 Subject: [PATCH 05/10] feat(hive): use latest tag --- .../gravitino/integration/test/container/ContainerSuite.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java index f0ee590b93..841fe80a73 100644 --- a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java +++ b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java @@ -88,7 +88,7 @@ public void startHiveContainer() { HiveContainer.Builder hiveBuilder = HiveContainer.builder() .withHostName("gravitino-ci-hive") - .withImage("unknowntpo/gravitino-ci-hive:shrink") + .withImage("unknowntpo/gravitino-ci-hive:latest") .withEnvVars( ImmutableMap.builder() .put("HADOOP_USER_NAME", "datastrato") From 332004cff04e76d328afbb678ad3a8c7ed3ca198 Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Fri, 10 May 2024 21:40:42 +0800 Subject: [PATCH 06/10] feat(hive:Dockerfile): rollback to old Dockerfile --- dev/docker/hive/Dockerfile | 42 ++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile index 7fcd7aa6cd..41c0e90180 100644 --- a/dev/docker/hive/Dockerfile +++ b/dev/docker/hive/Dockerfile @@ -3,33 +3,14 @@ # This software is licensed under the Apache License version 2. # -FROM ubuntu:16.04 AS packages +FROM ubuntu:16.04 +LABEL maintainer="support@datastrato.com" ARG HADOOP_PACKAGE_NAME ARG HIVE_PACKAGE_NAME ARG JDBC_DIVER_PACKAGE_NAME ARG DEBIAN_FRONTEND=noninteractive -COPY packages /tmp/packages - -ENV JAVA_HOME=/usr/local/jdk -ENV HIVE_HOME=/usr/local/hive -ENV HADOOP_HOME=/usr/local/hadoop - -# hadoop -RUN mkdir ${HADOOP_HOME} -RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} && rm -rf /tmp/packages/${HADOOP_PACKAGE_NAME} - -# hive -RUN mkdir ${HIVE_HOME} -RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} && rm -rf /tmp/packages/${HIVE_PACKAGE_NAME} - -# add mysql jdbc driver -RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} && rm -rf /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} - -FROM ubuntu:16.04 -LABEL maintainer="support@datastrato.com" - WORKDIR / ################################################################################ @@ -64,6 +45,8 @@ RUN apt-get update && apt-get upgrade -y && apt-get install --fix-missing -yq \ RUN mkdir /root/.ssh RUN cat /dev/zero | ssh-keygen -q -N "" > /dev/null && cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys +COPY packages /tmp/packages + ################################################################################ # set environment variables ENV JAVA_HOME=/usr/local/jdk @@ -107,7 +90,8 @@ RUN echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> /etc/environment ################################################################################ # install hadoop -COPY --from=packages ${HADOOP_HOME} ${HADOOP_HOME} +RUN mkdir ${HADOOP_HOME} +RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} # replace configuration templates RUN rm -f ${HADOOP_CONF_DIR}/core-site.xml @@ -126,7 +110,8 @@ RUN ${HADOOP_HOME}/bin/hdfs namenode -format -nonInteractive ################################################################################ # install hive -COPY --from=packages ${HIVE_HOME} ${HIVE_HOME} +RUN mkdir ${HIVE_HOME} +RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} ADD hive-site.xml ${HIVE_HOME}/conf/hive-site.xml ################################################################################ @@ -134,12 +119,16 @@ ADD hive-site.xml ${HIVE_HOME}/conf/hive-site.xml ENV MYSQL_PWD=ds123 RUN echo "mysql-server mysql-server/root_password password ${MYSQL_PWD}" | debconf-set-selections RUN echo "mysql-server mysql-server/root_password_again password ${MYSQL_PWD}" | debconf-set-selections -RUN apt-get install -y mysql-server && rm -rf /var/lib/apt/lists/* +RUN apt-get install -y mysql-server RUN chown -R mysql:mysql /var/lib/mysql RUN usermod -d /var/lib/mysql/ mysql RUN sed -i "s/.*bind-address.*/bind-address = 0.0.0.0/" /etc/mysql/mysql.conf.d/mysqld.cnf +################################################################################ +# add mysql jdbc driver +RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} + ################################################################################ # add users and groups RUN groupadd hdfs && groupadd hadoop && groupadd hive && groupadd mapred @@ -155,6 +144,11 @@ RUN usermod -a -G mapred datastrato RUN mkdir /home/datastrato RUN chown -R datastrato:hadoop /home/datastrato +################################################################################ +# removed install packages and cache +RUN rm -rf /tmp/packages +RUN rm -rf /var/lib/apt/lists/* + ################################################################################ # expose port EXPOSE 3306 9000 9083 10000 10002 50070 50075 50010 From daad34fb928a2efb69fa3236cc33565eb8d5ade2 Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Sat, 11 May 2024 15:13:22 +0800 Subject: [PATCH 07/10] feat(hive): use ln --- dev/docker/build-docker.sh | 2 +- dev/docker/hive/Dockerfile | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/dev/docker/build-docker.sh b/dev/docker/build-docker.sh index 9422c5a252..067dbd214f 100755 --- a/dev/docker/build-docker.sh +++ b/dev/docker/build-docker.sh @@ -73,7 +73,7 @@ fi if [[ "${component_type}" == "hive" ]]; then . ${script_dir}/hive/hive-dependency.sh - build_args="--build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} --build-arg JDBC_DIVER_PACKAGE_NAME=${JDBC_DIVER_PACKAGE_NAME}" + build_args="--build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} --build-arg JDBC_DIVER_PACKAGE_NAME=${JDBC_DIVER_PACKAGE_NAME} --build-arg HADOOP_VERSION=${HADOOP_VERSION} --build-arg HIVE_VERSION=${HIVE_VERSION} --build-arg MYSQL_JDBC_DRIVER_VERSION=${MYSQL_JDBC_DRIVER_VERSION}" elif [[ "${component_type}" == "kerberos-hive" ]]; then . ${script_dir}/kerberos-hive/hive-dependency.sh build_args="--build-arg HADOOP_PACKAGE_NAME=${HADOOP_PACKAGE_NAME} --build-arg HIVE_PACKAGE_NAME=${HIVE_PACKAGE_NAME} --build-arg JDBC_DIVER_PACKAGE_NAME=${JDBC_DIVER_PACKAGE_NAME}" diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile index 41c0e90180..5c1906651d 100644 --- a/dev/docker/hive/Dockerfile +++ b/dev/docker/hive/Dockerfile @@ -7,8 +7,11 @@ FROM ubuntu:16.04 LABEL maintainer="support@datastrato.com" ARG HADOOP_PACKAGE_NAME +ARG HADOOP_VERSION ARG HIVE_PACKAGE_NAME +ARG HIVE_VERSION ARG JDBC_DIVER_PACKAGE_NAME +ARG MYSQL_JDBC_DRIVER_VERSION ARG DEBIAN_FRONTEND=noninteractive WORKDIR / @@ -41,12 +44,10 @@ RUN apt-get update && apt-get upgrade -y && apt-get install --fix-missing -yq \ openjdk-8-jdk ################################################################################# -## setup ssh +# setup ssh RUN mkdir /root/.ssh RUN cat /dev/zero | ssh-keygen -q -N "" > /dev/null && cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys -COPY packages /tmp/packages - ################################################################################ # set environment variables ENV JAVA_HOME=/usr/local/jdk @@ -91,7 +92,8 @@ RUN echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> /etc/environment ################################################################################ # install hadoop RUN mkdir ${HADOOP_HOME} -RUN tar -xz -C ${HADOOP_HOME} --strip-components 1 -f /tmp/packages/${HADOOP_PACKAGE_NAME} +ADD packages/hadoop-${HADOOP_VERSION}.tar.gz /opt/ +RUN ln -s /opt/hadoop-${HADOOP_VERSION}/* ${HADOOP_HOME} # replace configuration templates RUN rm -f ${HADOOP_CONF_DIR}/core-site.xml @@ -108,10 +110,12 @@ ADD check-status.sh /tmp/check-status.sh # format HFS RUN ${HADOOP_HOME}/bin/hdfs namenode -format -nonInteractive -################################################################################ +################################################################################# # install hive RUN mkdir ${HIVE_HOME} -RUN tar -xz -C ${HIVE_HOME} --strip-components 1 -f /tmp/packages/${HIVE_PACKAGE_NAME} +ADD packages/apache-hive-${HIVE_VERSION}-bin.tar.gz /opt/ +RUN ln -s /opt/apache-hive-${HIVE_VERSION}-bin/* ${HIVE_HOME} + ADD hive-site.xml ${HIVE_HOME}/conf/hive-site.xml ################################################################################ @@ -127,7 +131,9 @@ RUN sed -i "s/.*bind-address.*/bind-address = 0.0.0.0/" /etc/mysql/mysql.conf.d/ ################################################################################ # add mysql jdbc driver -RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} +#RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} +ADD packages/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.tar.gz /opt/ +RUN ln -s /opt/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}/* ${HIVE_HOME}/lib ################################################################################ # add users and groups From 47a4149d7f13d5b5edab82e5d7a13f6c4f0020db Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Sun, 12 May 2024 10:23:19 +0800 Subject: [PATCH 08/10] feat(hive): minor tweaks --- dev/docker/hive/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile index 5c1906651d..2e3b535b71 100644 --- a/dev/docker/hive/Dockerfile +++ b/dev/docker/hive/Dockerfile @@ -6,9 +6,7 @@ FROM ubuntu:16.04 LABEL maintainer="support@datastrato.com" -ARG HADOOP_PACKAGE_NAME ARG HADOOP_VERSION -ARG HIVE_PACKAGE_NAME ARG HIVE_VERSION ARG JDBC_DIVER_PACKAGE_NAME ARG MYSQL_JDBC_DRIVER_VERSION @@ -131,7 +129,6 @@ RUN sed -i "s/.*bind-address.*/bind-address = 0.0.0.0/" /etc/mysql/mysql.conf.d/ ################################################################################ # add mysql jdbc driver -#RUN tar -xz -C ${HIVE_HOME}/lib --strip-components 1 -f /tmp/packages/${JDBC_DIVER_PACKAGE_NAME} ADD packages/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}.tar.gz /opt/ RUN ln -s /opt/mysql-connector-java-${MYSQL_JDBC_DRIVER_VERSION}/* ${HIVE_HOME}/lib From 4c75621fcd27be9b95b874e16cad80d79c37e452 Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Sun, 12 May 2024 11:39:51 +0800 Subject: [PATCH 09/10] feat(hive): minor tweaks --- dev/docker/hive/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/docker/hive/Dockerfile b/dev/docker/hive/Dockerfile index 2e3b535b71..5aa0a2d01b 100644 --- a/dev/docker/hive/Dockerfile +++ b/dev/docker/hive/Dockerfile @@ -108,7 +108,7 @@ ADD check-status.sh /tmp/check-status.sh # format HFS RUN ${HADOOP_HOME}/bin/hdfs namenode -format -nonInteractive -################################################################################# +################################################################################ # install hive RUN mkdir ${HIVE_HOME} ADD packages/apache-hive-${HIVE_VERSION}-bin.tar.gz /opt/ From acf79aac27ca776a8d4ecb894ad5f9507d738f93 Mon Sep 17 00:00:00 2001 From: unknowntpo Date: Wed, 22 May 2024 12:33:43 +0800 Subject: [PATCH 10/10] change hive image back to gravitino-ci-hive, update changelog --- catalogs/catalog-hadoop/build.gradle.kts | 2 +- catalogs/catalog-hive/build.gradle.kts | 2 +- catalogs/catalog-lakehouse-iceberg/build.gradle.kts | 2 +- docs/docker-image-details.md | 3 +++ .../gravitino/integration/test/container/ContainerSuite.java | 1 - integration-test/build.gradle.kts | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/catalogs/catalog-hadoop/build.gradle.kts b/catalogs/catalog-hadoop/build.gradle.kts index 46e37babc0..3596f6e9e9 100644 --- a/catalogs/catalog-hadoop/build.gradle.kts +++ b/catalogs/catalog-hadoop/build.gradle.kts @@ -98,7 +98,7 @@ tasks.test { dependsOn(tasks.jar) doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.11") + environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12") } val init = project.extra.get("initIntegrationTest") as (Test) -> Unit diff --git a/catalogs/catalog-hive/build.gradle.kts b/catalogs/catalog-hive/build.gradle.kts index 179ae2ada8..bf01ee5fc0 100644 --- a/catalogs/catalog-hive/build.gradle.kts +++ b/catalogs/catalog-hive/build.gradle.kts @@ -164,7 +164,7 @@ tasks.test { dependsOn(tasks.jar) doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.11") + environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12") } val init = project.extra.get("initIntegrationTest") as (Test) -> Unit diff --git a/catalogs/catalog-lakehouse-iceberg/build.gradle.kts b/catalogs/catalog-lakehouse-iceberg/build.gradle.kts index 56100fe2db..2a846d3651 100644 --- a/catalogs/catalog-lakehouse-iceberg/build.gradle.kts +++ b/catalogs/catalog-lakehouse-iceberg/build.gradle.kts @@ -162,7 +162,7 @@ tasks.test { dependsOn(tasks.jar) doFirst { - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.11") + environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12") } val init = project.extra.get("initIntegrationTest") as (Test) -> Unit diff --git a/docs/docker-image-details.md b/docs/docker-image-details.md index 473353fa1a..f5373bb400 100644 --- a/docs/docker-image-details.md +++ b/docs/docker-image-details.md @@ -100,6 +100,9 @@ You can use this kind of image to test the catalog of Apache Hive. Changelog +- gravitino-ci-hive:0.1.12 + - Shrink hive Docker image size by 420MB + - gravitino-ci-hive:0.1.11 - Remove `yarn` from the startup script; Remove `yarn-site.xml` and `yarn-env.sh` files; - Change the value of `mapreduce.framework.name` from `yarn` to `local` in the `mapred-site.xml` file. diff --git a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java index 841fe80a73..22b10306ab 100644 --- a/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java +++ b/integration-test-common/src/test/java/com/datastrato/gravitino/integration/test/container/ContainerSuite.java @@ -88,7 +88,6 @@ public void startHiveContainer() { HiveContainer.Builder hiveBuilder = HiveContainer.builder() .withHostName("gravitino-ci-hive") - .withImage("unknowntpo/gravitino-ci-hive:latest") .withEnvVars( ImmutableMap.builder() .put("HADOOP_USER_NAME", "datastrato") diff --git a/integration-test/build.gradle.kts b/integration-test/build.gradle.kts index dc464d48d4..d5a18edf03 100644 --- a/integration-test/build.gradle.kts +++ b/integration-test/build.gradle.kts @@ -160,7 +160,7 @@ tasks.test { doFirst { // Gravitino CI Docker image - environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.11") + environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12") environment("GRAVITINO_CI_TRINO_DOCKER_IMAGE", "datastrato/gravitino-ci-trino:0.1.5") environment("GRAVITINO_CI_KAFKA_DOCKER_IMAGE", "apache/kafka:3.7.0") environment("GRAVITINO_CI_DORIS_DOCKER_IMAGE", "datastrato/gravitino-ci-doris:0.1.3")