diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000000..d714c758aab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM maven:3.5-jdk-8 as builder +ADD . /workspace/zeppelin +WORKDIR /workspace/zeppelin +# Allow npm and bower to run with root privileges +RUN echo "unsafe-perm=true" > ~/.npmrc && \ + echo '{ "allow_root": true }' > ~/.bowerrc && \ + mvn -B package -DskipTests -Pbuild-distr -Pspark-3.0 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular && \ + # Example with doesn't compile all interpreters + # mvn -B package -DskipTests -Pbuild-distr -Pspark-3.0 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -pl '!groovy,!submarine,!livy,!hbase,!pig,!file,!flink,!ignite,!kylin,!lens' && \ + mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*/zeppelin-* /opt/zeppelin/ && \ + # Removing stuff saves time, because docker creates a temporary layer + rm -rf ~/.m2 && \ + rm -rf /workspace/zeppelin/* + +FROM ubuntu:18.04 +COPY --from=builder /opt/zeppelin /opt/zeppelin diff --git a/docs/setup/deployment/docker.md b/docs/setup/deployment/docker.md index 746986d6080..702e5a2799c 100644 --- a/docs/setup/deployment/docker.md +++ b/docs/setup/deployment/docker.md @@ -31,10 +31,10 @@ This document contains instructions about making docker containers for Zeppelin. ### Installing Docker You need to [install docker](https://docs.docker.com/engine/installation/) on your machine. -### Running docker image +### Running docker image for Zeppelin distribution ```bash -docker run -p 8080:8080 --rm --name zeppelin apache/zeppelin: +docker run -p 8080:8080 --rm --name zeppelin apache/zeppelin-server: ``` * Zeppelin will run at `http://localhost:8080`. @@ -47,7 +47,7 @@ docker run -p 8080:8080 --rm \ -v $PWD/notebook:/notebook \ -e ZEPPELIN_LOG_DIR='/logs' \ -e ZEPPELIN_NOTEBOOK_DIR='/notebook' \ ---name zeppelin apache/zeppelin: # e.g '0.7.1' +--name zeppelin apache/zeppelin-server: # e.g '0.9.0' ``` ### Building dockerfile locally @@ -59,3 +59,27 @@ cd scripts/docker/zeppelin/bin docker build -t my-zeppelin:my-tag ./ ``` +### Build docker image for Zeppelin server & interpreters + +Starting from 0.9, Zeppelin support to run in k8s or docker. So we add the capability to +build docker images for Zeppelin server & interpreter. +Recommendation: Edit the Docker files yourself to adapt them to your needs and reduce the image size. + +At first your need to build a zeppelin-distribution docker image. +```bash +cd $ZEPPELIN_HOME +docker build -t zeppelin-distribution . +``` + +Build docker image for zeppelin server. +```bash +cd $ZEPPELIN_HOME/scripts/docker/zeppelin-server +docker build -t zeppelin-server . +``` + +Build base docker image for zeppelin interpreter. +```bash +cd $ZEPPELIN_HOME/scripts/docker/zeppelin-interpreter +docker build -t zeppelin-interpreter-base . +``` + diff --git a/k8s/zeppelin-server.yaml b/k8s/zeppelin-server.yaml index 11023d2c29e..98fcf9d3bed 100644 --- a/k8s/zeppelin-server.yaml +++ b/k8s/zeppelin-server.yaml @@ -29,8 +29,8 @@ data: # If you have your ingress controller configured to connect to `zeppelin-server` service and have a domain name for it (with wildcard subdomain point the same address), you can replace serviceDomain field with your own domain. SERVICE_DOMAIN: local.zeppelin-project.org:8080 ZEPPELIN_K8S_SPARK_CONTAINER_IMAGE: spark:2.4.5 - ZEPPELIN_K8S_CONTAINER_IMAGE: apache/zeppelin:0.9.0-SNAPSHOT - ZEPPELIN_HOME: /zeppelin + ZEPPELIN_K8S_CONTAINER_IMAGE: apache/zeppelin-interpreter:0.9.0-SNAPSHOT + ZEPPELIN_HOME: /opt/zeppelin ZEPPELIN_SERVER_RPC_PORTRANGE: 12320:12320 # default value of 'master' property for spark interpreter. SPARK_MASTER: k8s://https://kubernetes.default.svc @@ -115,7 +115,7 @@ spec: path: nginx.conf containers: - name: zeppelin-server - image: apache/zeppelin:0.9.0-SNAPSHOT + image: apache/zeppelin-server:0.9.0-SNAPSHOT command: ["sh", "-c", "$(ZEPPELIN_HOME)/bin/zeppelin.sh"] lifecycle: preStop: diff --git a/scripts/docker/zeppelin-interpreter/Dockerfile b/scripts/docker/zeppelin-interpreter/Dockerfile new file mode 100644 index 00000000000..090d1cc67dd --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/Dockerfile @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM zeppelin-distribution:latest AS zeppelin-distribution + +FROM ubuntu:20.04 + +LABEL maintainer="Apache Software Foundation " + +ARG version="0.9.0-SNAPSHOT" + +ENV VERSION="${version}" \ + Z_HOME="/opt/zeppelin" + +RUN set -ex && \ + apt-get -y update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jre-headless wget && \ + # Cleanup + rm -rf /var/lib/apt/lists/* && \ + apt-get autoclean && \ + apt-get clean + +COPY --from=zeppelin-distribution /opt/zeppelin/bin ${Z_HOME}/bin +COPY log4j.properties ${Z_HOME}/conf/ +COPY log4j_yarn_cluster.properties ${Z_HOME}/conf/ +# Decide: +## 1) Build a huge image with all interpreters (default) +COPY --from=zeppelin-distribution /opt/zeppelin/interpreter ${Z_HOME}/interpreter +## 2) Build an image with only some interpreters +#### Copy interpreter-shaded JAR, needed for all interpreters +### COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/zeppelin-interpreter-shaded-${VERSION}.jar ${Z_HOME}/interpreter/zeppelin-interpreter-shaded-${VERSION}.jar +#### Copy specific interpreters, replace "${interpreter_name}" with your interpreter. Of course you can repeat the line with defferent interpreter +### COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/${interpreter_name} ${Z_HOME}/interpreter/${interpreter_name} + + +# Decide: Install conda to manage python and R packages. Maybe adjust the packages in pip_packages.txt or conda_packages.txt +ARG miniconda_version="py38_4.8.3" +ARG miniconda_sha256="879457af6a0bf5b34b48c12de31d4df0ee2f06a8e68768e5758c3293b2daf688" +# Install python and R packages via conda +COPY conda_packages.txt /conda_packages.txt +# Some python packages are not available via conda, so we are using pip +COPY pip_packages.txt /pip_packages.txt +RUN set -ex && \ + wget -nv https://repo.anaconda.com/miniconda/Miniconda3-${miniconda_version}-Linux-x86_64.sh -O miniconda.sh && \ + echo "${miniconda_sha256} miniconda.sh" > anaconda.sha256 && \ + sha256sum --strict -c anaconda.sha256 && \ + bash miniconda.sh -b -p /opt/conda && \ + export PATH=/opt/conda/bin:$PATH && \ + conda config --set always_yes yes --set changeps1 no && \ + conda info -a && \ + conda config --add channels conda-forge && \ + conda install -y --quiet --file /conda_packages.txt && \ + pip install -q -r /pip_packages.txt && \ + # Cleanup + rm -v miniconda.sh anaconda.sha256 && \ + # Cleanup based on https://github.com/ContinuumIO/docker-images/commit/cac3352bf21a26fa0b97925b578fb24a0fe8c383 + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + conda clean -ay + # Allow to modify conda packages. This allows malicious code to be injected into other interpreter sessions, therefore it is disabled by default + # chmod -R ug+rwX /opt/conda +ENV PATH /opt/conda/bin:$PATH + +RUN mkdir -p "${Z_HOME}/logs" "${Z_HOME}/run" "${Z_HOME}/local-repo" && \ + # Allow process to edit /etc/passwd, to create a user entry for zeppelin + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + # Give access to some specific folders + chmod -R 775 "${Z_HOME}/logs" "${Z_HOME}/run" "${Z_HOME}/local-repo" + +USER 1000 +WORKDIR ${Z_HOME} diff --git a/scripts/docker/zeppelin-interpreter/conda_packages.txt b/scripts/docker/zeppelin-interpreter/conda_packages.txt new file mode 100644 index 00000000000..95bdfa27d9b --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/conda_packages.txt @@ -0,0 +1,23 @@ +# python packages +pycodestyle +numpy +pandas +scipy +grpcio +hvplot +protobuf +pandasql +ipython +matplotlib +ipykernel +jupyter_client +bokeh +apache-beam + +# R packages +r-evaluate +r-base64enc +r-knitr +r-ggplot2 +r-shiny +r-googlevis diff --git a/scripts/docker/zeppelin-interpreter/log4j.properties b/scripts/docker/zeppelin-interpreter/log4j.properties new file mode 100644 index 00000000000..8daee59d60d --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/log4j.properties @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger = INFO, stdout + +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n diff --git a/scripts/docker/zeppelin-interpreter/log4j_yarn_cluster.properties b/scripts/docker/zeppelin-interpreter/log4j_yarn_cluster.properties new file mode 100644 index 00000000000..532fc5ef5f1 --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/log4j_yarn_cluster.properties @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger = INFO, stdout + +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n + diff --git a/scripts/docker/zeppelin-interpreter/pip_packages.txt b/scripts/docker/zeppelin-interpreter/pip_packages.txt new file mode 100644 index 00000000000..9123189b05e --- /dev/null +++ b/scripts/docker/zeppelin-interpreter/pip_packages.txt @@ -0,0 +1 @@ +bkzep==0.6.1 \ No newline at end of file diff --git a/scripts/docker/zeppelin-server/Dockerfile b/scripts/docker/zeppelin-server/Dockerfile new file mode 100644 index 00000000000..ec470e42161 --- /dev/null +++ b/scripts/docker/zeppelin-server/Dockerfile @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM zeppelin-distribution:latest AS zeppelin-distribution + +# Prepare all interpreter settings for Zeppelin server +# This steps are not needed, if you you add only specific interpreters settings to your image +FROM alpine:3.11 AS interpreter-settings +COPY --from=zeppelin-distribution /opt/zeppelin/interpreter /tmp/interpreter +RUN mkdir -p /opt/zeppelin/interpreter && \ + cd /tmp/interpreter && \ + find . -name 'interpreter-setting.json' -exec cp --parents \{\} /opt/zeppelin/interpreter \; + +FROM ubuntu:20.04 +LABEL maintainer="Apache Software Foundation " + +RUN set -ex && \ + apt-get -y update && \ + # Install language and other base packages + DEBIAN_FRONTEND=noninteractive apt-get install -y language-pack-en openjdk-8-jre-headless tini wget && \ + # Cleanup + rm -rf /var/lib/apt/lists/* && \ + apt-get autoclean && \ + apt-get clean + +ARG version="0.9.0-SNAPSHOT" + +ENV LANG=en_US.UTF-8 \ + LC_ALL=en_US.UTF-8 \ + JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \ + VERSION="${version}" \ + Z_HOME="/opt/zeppelin" \ + ZEPPELIN_ADDR="0.0.0.0" \ + ZEPPELIN_WAR_TEMPDIR="/tmp/webapps" + +# Copy Zeppelin related files +COPY --from=zeppelin-distribution /opt/zeppelin/zeppelin-web-${VERSION}.war ${Z_HOME}/ +COPY --from=zeppelin-distribution /opt/zeppelin/zeppelin-web-angular-${VERSION}.war ${Z_HOME}/ +COPY --from=zeppelin-distribution /opt/zeppelin/conf ${Z_HOME}/conf +COPY --from=zeppelin-distribution /opt/zeppelin/bin ${Z_HOME}/bin +COPY --from=zeppelin-distribution /opt/zeppelin/lib ${Z_HOME}/lib +COPY --from=zeppelin-distribution /opt/zeppelin/plugins ${Z_HOME}/plugins +COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/zeppelin-interpreter-shaded-${VERSION}.jar ${Z_HOME}/interpreter/zeppelin-interpreter-shaded-${VERSION}.jar +# copy example notebooks +COPY --from=zeppelin-distribution /opt/zeppelin/notebook ${Z_HOME}/notebook +# copy k8s files +COPY --from=zeppelin-distribution /opt/zeppelin/k8s ${Z_HOME}/k8s + +# Decide +## 1) Copy and activate all interpreters (default) +COPY --from=interpreter-settings /opt/zeppelin/interpreter ${Z_HOME}/interpreter +## 2) Copy and activate only a specific set of interpreter +# COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/spark/interpreter-setting.json ${Z_HOME}/interpreter/spark/interpreter-setting.json +# COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/jdbc/interpreter-setting.json ${Z_HOME}/interpreter/jdbc/interpreter-setting.json +# COPY --from=zeppelin-distribution /opt/zeppelin/interpreter/md/interpreter-setting.json ${Z_HOME}/interpreter/md/interpreter-setting.json + +COPY log4j.properties ${Z_HOME}/conf/ + +RUN mkdir -p "${Z_HOME}/logs" "${Z_HOME}/run" "${Z_HOME}/notebook" "${Z_HOME}/local-repo" && \ + # Allow process to edit /etc/passwd, to create a user entry for zeppelin + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + # Give access to some specific folders + chmod -R 775 "${Z_HOME}/logs" "${Z_HOME}/run" "${Z_HOME}/conf" "${Z_HOME}/notebook" "${Z_HOME}/local-repo" + +USER 1000 + +EXPOSE 8080 + +ENTRYPOINT [ "/usr/bin/tini", "--" ] +WORKDIR ${Z_HOME} +CMD ["bin/zeppelin.sh"] diff --git a/scripts/docker/zeppelin-server/log4j.properties b/scripts/docker/zeppelin-server/log4j.properties new file mode 100644 index 00000000000..8daee59d60d --- /dev/null +++ b/scripts/docker/zeppelin-server/log4j.properties @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger = INFO, stdout + +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n diff --git a/zeppelin-plugins/launcher/k8s-standard/src/main/java/org/apache/zeppelin/interpreter/launcher/K8sRemoteInterpreterProcess.java b/zeppelin-plugins/launcher/k8s-standard/src/main/java/org/apache/zeppelin/interpreter/launcher/K8sRemoteInterpreterProcess.java index b95cc5b0cfe..b7a4b1683c3 100644 --- a/zeppelin-plugins/launcher/k8s-standard/src/main/java/org/apache/zeppelin/interpreter/launcher/K8sRemoteInterpreterProcess.java +++ b/zeppelin-plugins/launcher/k8s-standard/src/main/java/org/apache/zeppelin/interpreter/launcher/K8sRemoteInterpreterProcess.java @@ -58,6 +58,7 @@ public class K8sRemoteInterpreterProcess extends RemoteInterpreterProcess { private static final String SPARK_DRIVER_MEMORY_OVERHEAD = "spark.driver.memoryOverhead"; private static final String SPARK_DRIVER_CORES = "spark.driver.cores"; private static final String ENV_SERVICE_DOMAIN = "SERVICE_DOMAIN"; + private static final String ENV_ZEPPELIN_HOME = "ZEPPELIN_HOME"; public K8sRemoteInterpreterProcess( KubernetesClient client, @@ -280,7 +281,7 @@ Properties getTemplateBindings(String userName) { // environment variables envs.put(ENV_SERVICE_DOMAIN, envs.getOrDefault(ENV_SERVICE_DOMAIN, System.getenv(ENV_SERVICE_DOMAIN))); - envs.put("ZEPPELIN_HOME", envs.getOrDefault("ZEPPELIN_HOME", "/zeppelin")); + envs.put(ENV_ZEPPELIN_HOME, envs.getOrDefault(ENV_ZEPPELIN_HOME, System.getenv(ENV_ZEPPELIN_HOME))); if (isSpark()) { int webUiPort = 4040;