diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000000..d714c758aab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM maven:3.5-jdk-8 as builder +ADD . /workspace/zeppelin +WORKDIR /workspace/zeppelin +# Allow npm and bower to run with root privileges +RUN echo "unsafe-perm=true" > ~/.npmrc && \ + echo '{ "allow_root": true }' > ~/.bowerrc && \ + mvn -B package -DskipTests -Pbuild-distr -Pspark-3.0 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular && \ + # Example with doesn't compile all interpreters + # mvn -B package -DskipTests -Pbuild-distr -Pspark-3.0 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -pl '!groovy,!submarine,!livy,!hbase,!pig,!file,!flink,!ignite,!kylin,!lens' && \ + mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*/zeppelin-* /opt/zeppelin/ && \ + # Removing stuff saves time, because docker creates a temporary layer + rm -rf ~/.m2 && \ + rm -rf /workspace/zeppelin/* + +FROM ubuntu:18.04 +COPY --from=builder /opt/zeppelin /opt/zeppelin diff --git a/docs/setup/deployment/docker.md b/docs/setup/deployment/docker.md index 746986d6080..702e5a2799c 100644 --- a/docs/setup/deployment/docker.md +++ b/docs/setup/deployment/docker.md @@ -31,10 +31,10 @@ This document contains instructions about making docker containers for Zeppelin. ### Installing Docker You need to [install docker](https://docs.docker.com/engine/installation/) on your machine. -### Running docker image +### Running docker image for Zeppelin distribution ```bash -docker run -p 8080:8080 --rm --name zeppelin apache/zeppelin: +docker run -p 8080:8080 --rm --name zeppelin apache/zeppelin-server: ``` * Zeppelin will run at `http://localhost:8080`. @@ -47,7 +47,7 @@ docker run -p 8080:8080 --rm \ -v $PWD/notebook:/notebook \ -e ZEPPELIN_LOG_DIR='/logs' \ -e ZEPPELIN_NOTEBOOK_DIR='/notebook' \ ---name zeppelin apache/zeppelin: # e.g '0.7.1' +--name zeppelin apache/zeppelin-server: # e.g '0.9.0' ``` ### Building dockerfile locally @@ -59,3 +59,27 @@ cd scripts/docker/zeppelin/bin docker build -t my-zeppelin:my-tag ./ ``` +### Build docker image for Zeppelin server & interpreters + +Starting from 0.9, Zeppelin support to run in k8s or docker. So we add the capability to +build docker images for Zeppelin server & interpreter. +Recommendation: Edit the Docker files yourself to adapt them to your needs and reduce the image size. + +At first your need to build a zeppelin-distribution docker image. +```bash +cd $ZEPPELIN_HOME +docker build -t zeppelin-distribution . +``` + +Build docker image for zeppelin server. +```bash +cd $ZEPPELIN_HOME/scripts/docker/zeppelin-server +docker build -t zeppelin-server . +``` + +Build base docker image for zeppelin interpreter. +```bash +cd $ZEPPELIN_HOME/scripts/docker/zeppelin-interpreter +docker build -t zeppelin-interpreter-base . +``` + diff --git a/k8s/zeppelin-server.yaml b/k8s/zeppelin-server.yaml index 11023d2c29e..c00de27f803 100644 --- a/k8s/zeppelin-server.yaml +++ b/k8s/zeppelin-server.yaml @@ -29,7 +29,7 @@ data: # If you have your ingress controller configured to connect to `zeppelin-server` service and have a domain name for it (with wildcard subdomain point the same address), you can replace serviceDomain field with your own domain. SERVICE_DOMAIN: local.zeppelin-project.org:8080 ZEPPELIN_K8S_SPARK_CONTAINER_IMAGE: spark:2.4.5 - ZEPPELIN_K8S_CONTAINER_IMAGE: apache/zeppelin:0.9.0-SNAPSHOT + ZEPPELIN_K8S_CONTAINER_IMAGE: apache/zeppelin-interpreter:0.9.0-SNAPSHOT ZEPPELIN_HOME: /zeppelin ZEPPELIN_SERVER_RPC_PORTRANGE: 12320:12320 # default value of 'master' property for spark interpreter. @@ -115,7 +115,7 @@ spec: path: nginx.conf containers: - name: zeppelin-server - image: apache/zeppelin:0.9.0-SNAPSHOT + image: apache/zeppelin-server:0.9.0-SNAPSHOT command: ["sh", "-c", "$(ZEPPELIN_HOME)/bin/zeppelin.sh"] lifecycle: preStop: diff --git a/scripts/docker/zeppelin-interpreter/Dockerfile b/scripts/docker/zeppelin-interpreter/Dockerfile new file mode 100644 index 00000000000..ada1659e225 --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/Dockerfile @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM zeppelin-distribution:latest AS zeppelin-distribution + +FROM ubuntu:18.04 + +LABEL maintainer="Apache Software Foundation " + +ARG version="0.9.0-SNAPSHOT" + +ENV VERSION="${version}" \ + Z_HOME="/opt/zeppelin" + +RUN set -ex && \ + apt-get -y update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jre-headless wget && \ + # Cleanup + rm -rf /var/lib/apt/lists/* && \ + apt-get autoclean && \ + apt-get clean + +COPY --from=zeppelin-distribution /opt/zeppelin/bin ${Z_HOME}/bin +COPY log4j.properties ${Z_HOME}/conf/ +COPY log4j_yarn_cluster.properties ${Z_HOME}/conf/ +# Decide: +## 1) Build a huge image with all interpreters (default) +COPY --from=zeppelin-distribution /opt/zeppelin/interpreter ${Z_HOME}/interpreter +## 2) Build an image with only some interpreters +#### Copy interpreter-shaded JAR, needed for all interpreters +### COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/zeppelin-interpreter-shaded-${VERSION}.jar ${Z_HOME}/interpreter/zeppelin-interpreter-shaded-${VERSION}.jar +#### Copy specific interpreters, replace "${interpreter_name}" with your interpreter. Of course you can repeat the line with defferent interpreter +### COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/${interpreter_name} ${Z_HOME}/interpreter/${interpreter_name} + + +# Decide: Install conda to manage python and R packages +ARG miniconda_version="py37_4.8.2" +ARG miniconda_sha256="957d2f0f0701c3d1335e3b39f235d197837ad69a944fa6f5d8ad2c686b69df3b" +# Install python packages via conda +COPY python_conda_packages.txt /python_conda_packages.txt +# Some python packages are not available via conda, so we are using pip +COPY pip_packages.txt /pip_packages.txt +# Install R packages via condo +COPY r_conda_packages.txt /r_conda_packages.txt +RUN set -ex && \ + wget -nv https://repo.anaconda.com/miniconda/Miniconda3-${miniconda_version}-Linux-x86_64.sh -O miniconda.sh && \ + echo "${miniconda_sha256} miniconda.sh" > anaconda.sha256 && \ + sha256sum --strict -c anaconda.sha256 && \ + bash miniconda.sh -b -p /opt/conda && \ + export PATH=/opt/conda/bin:$PATH && \ + conda config --set always_yes yes --set changeps1 no && \ + conda info -a && \ + conda config --add channels conda-forge && \ + conda install -y --quiet --file /python_conda_packages.txt && \ + pip install -q -r /pip_packages.txt && \ + conda install -y --quiet --file /r_conda_packages.txt && \ + # Cleanup + rm -v miniconda.sh anaconda.sha256 && \ + # Cleanup based on https://github.com/ContinuumIO/docker-images/commit/cac3352bf21a26fa0b97925b578fb24a0fe8c383 + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + conda clean -ay +ENV PATH /opt/conda/bin:$PATH + +# Allow process to edit /etc/passwd, to create a user entry for zeppelin +RUN chgrp root /etc/passwd && chmod ug+rw /etc/passwd + +USER 1000 +WORKDIR ${Z_HOME} diff --git a/scripts/docker/zeppelin-interpreter/log4j.properties b/scripts/docker/zeppelin-interpreter/log4j.properties new file mode 100644 index 00000000000..8daee59d60d --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/log4j.properties @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger = INFO, stdout + +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n diff --git a/scripts/docker/zeppelin-interpreter/log4j_yarn_cluster.properties b/scripts/docker/zeppelin-interpreter/log4j_yarn_cluster.properties new file mode 100644 index 00000000000..532fc5ef5f1 --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/log4j_yarn_cluster.properties @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger = INFO, stdout + +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n + diff --git a/scripts/docker/zeppelin-interpreter/pip_packages.txt b/scripts/docker/zeppelin-interpreter/pip_packages.txt new file mode 100644 index 00000000000..9123189b05e --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/pip_packages.txt @@ -0,0 +1 @@ +bkzep==0.6.1 \ No newline at end of file diff --git a/scripts/docker/zeppelin-interpreter/python_conda_packages.txt b/scripts/docker/zeppelin-interpreter/python_conda_packages.txt new file mode 100644 index 00000000000..0b8facf1544 --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/python_conda_packages.txt @@ -0,0 +1,14 @@ +pycodestyle +numpy +pandas +scipy +grpcio +hvplot +protobuf +pandasql +ipython +matplotlib +ipykernel +jupyter_client +bokeh +apache-beam \ No newline at end of file diff --git a/scripts/docker/zeppelin-interpreter/r_conda_packages.txt b/scripts/docker/zeppelin-interpreter/r_conda_packages.txt new file mode 100644 index 00000000000..acb7c643063 --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/r_conda_packages.txt @@ -0,0 +1,6 @@ +r-evaluate +r-base64enc +r-knitr +r-ggplot2 +r-shiny +r-googlevis \ No newline at end of file diff --git a/scripts/docker/zeppelin-server/Dockerfile b/scripts/docker/zeppelin-server/Dockerfile new file mode 100644 index 00000000000..68d103df610 --- /dev/null +++ b/scripts/docker/zeppelin-server/Dockerfile @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM zeppelin-distribution:latest AS zeppelin-distribution + +FROM alpine:3.11 AS downloader + +ENV TINI_VERSION=v0.19.0 + +RUN set -ex && \ + /sbin/apk add --no-cache wget gnupg + # Install tini +RUN set -ex && \ + /usr/bin/wget -q https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-static-amd64 -O /sbin/tini && \ + /usr/bin/wget -q https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-static-amd64.asc -O /tmp/tini.asc && \ + /usr/bin/gpg --batch --keyserver keyserver.ubuntu.com --recv-keys 595E85A6B1B4779EA4DAAEC70B588DFF0527A9B7 && \ + /usr/bin/gpg --batch --verify /tmp/tini.asc /sbin/tini && \ + /bin/chmod +x /sbin/tini && \ + /bin/rm -v /tmp/tini.asc + +# Prepare all interpreter settings for Zeppelin server +# This steps are not needed, if you you add only specific interpreters settings to your image +FROM alpine:3.11 AS interpreter-settings +COPY --from=zeppelin-distribution /opt/zeppelin/interpreter /tmp/interpreter +RUN mkdir -p /opt/zeppelin/interpreter && \ + cd /tmp/interpreter && \ + find . -name 'interpreter-setting.json' -exec cp --parents \{\} /opt/zeppelin/interpreter \; + +FROM ubuntu:18.04 +LABEL maintainer="Apache Software Foundation " + +RUN set -ex && \ + apt-get -y update && \ + # Install language and other base packages + DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en openjdk-8-jre-headless wget && \ + # Cleanup + rm -rf /var/lib/apt/lists/* && \ + apt-get autoclean && \ + apt-get clean + +ARG version="0.9.0-SNAPSHOT" + +ENV LANG=en_US.UTF-8 \ + LC_ALL=en_US.UTF-8 \ + JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \ + VERSION="${version}" \ + Z_HOME="/opt/zeppelin" \ + ZEPPELIN_ADDR="0.0.0.0" \ + ZEPPELIN_WAR_TEMPDIR="/tmp/webapps" \ + ZEPPELIN_DEP_LOCALREPO="/tmp/local-repo" + +# Copy Zeppelin related files +COPY --from=zeppelin-distribution /opt/zeppelin/zeppelin-web-${VERSION}.war ${Z_HOME}/ +COPY --from=zeppelin-distribution /opt/zeppelin/zeppelin-web-angular-${VERSION}.war ${Z_HOME}/ +COPY --from=zeppelin-distribution /opt/zeppelin/conf ${Z_HOME}/conf +COPY --from=zeppelin-distribution /opt/zeppelin/bin ${Z_HOME}/bin +COPY --from=zeppelin-distribution /opt/zeppelin/lib ${Z_HOME}/lib +COPY --from=zeppelin-distribution /opt/zeppelin/plugins ${Z_HOME}/plugins +COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/zeppelin-interpreter-shaded-${VERSION}.jar ${Z_HOME}/interpreter/zeppelin-interpreter-shaded-${VERSION}.jar +# copy example notebooks +COPY --from=zeppelin-distribution /opt/zeppelin/notebook ${Z_HOME}/notebook +# copy k8s files +COPY --from=zeppelin-distribution /opt/zeppelin/k8s ${Z_HOME}/k8s + +# Decide +## 1) Copy and activate all interpreters (default) +COPY --from=interpreter-settings /opt/zeppelin/interpreter ${Z_HOME}/interpreter +## 2) Copy and activate only a specific set of interpreter +# COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/spark/interpreter-setting.json ${Z_HOME}/interpreter/spark/interpreter-setting.json +# COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/jdbc/interpreter-setting.json ${Z_HOME}/interpreter/jdbc/interpreter-setting.json +# COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/md/interpreter-setting.json ${Z_HOME}/interpreter/md/interpreter-setting.json + +COPY log4j.properties ${Z_HOME}/conf/ +COPY --from=downloader /sbin/tini /sbin/tini + +RUN mkdir -p "${Z_HOME}/logs" "${Z_HOME}/run" && \ + # Allow process to edit /etc/passwd, to create a user entry for zeppelin + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + # Give access to some specific folders + chmod -R 775 "${Z_HOME}/logs" "${Z_HOME}/run" "${Z_HOME}/notebook" "${Z_HOME}/conf" + +USER 1000 + +EXPOSE 8080 + +ENTRYPOINT [ "/sbin/tini", "--" ] +WORKDIR ${Z_HOME} +CMD ["bin/zeppelin.sh"] diff --git a/scripts/docker/zeppelin-server/log4j.properties b/scripts/docker/zeppelin-server/log4j.properties new file mode 100644 index 00000000000..8daee59d60d --- /dev/null +++ b/scripts/docker/zeppelin-server/log4j.properties @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger = INFO, stdout + +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n