diff --git a/plugins/spark/v3.5/getting-started/docker-compose.yml b/plugins/spark/v3.5/getting-started/docker-compose.yml index e010d4a92c..fd7003936c 100644 --- a/plugins/spark/v3.5/getting-started/docker-compose.yml +++ b/plugins/spark/v3.5/getting-started/docker-compose.yml @@ -54,4 +54,4 @@ services: AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY POLARIS_HOST: polaris volumes: - - ./notebooks:/home/jovyan/notebooks + - ./notebooks:/home/spark/notebooks diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 3254ebb551..9af0b5fd36 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -17,31 +17,35 @@ # under the License. # -FROM jupyter/all-spark-notebook:spark-3.5.0 +FROM docker.io/apache/spark:3.5.6-java17 -ENV LANGUAGE='en_US:en' +ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" +ENV PYSPARK_PYTHON=/home/spark/venv/bin/python USER root -# Generic table support requires delta 3.2.1 -# Install Spark 3.5.6 -RUN wget -q https://www.apache.org/dyn/closer.lua/spark/spark-3.5.6/spark-3.5.6-bin-hadoop3.tgz?action=download \ - && tar -xzf spark-3.5.6-bin-hadoop3.tgz \ - && mv spark-3.5.6-bin-hadoop3 /opt/spark \ - && rm spark-3.5.6-bin-hadoop3.tgz +RUN apt-get update -y && \ + apt-get install -y python3-venv && \ + mkdir -p /home/spark && \ + chown -R spark /home/spark -# Set environment variables -ENV SPARK_HOME=/opt/spark -ENV PATH=$SPARK_HOME/bin:$PATH +USER spark -USER jovyan +WORKDIR /home/spark -COPY --chown=jovyan client /home/jovyan/client -COPY --chown=jovyan regtests/requirements.txt /tmp -COPY --chown=jovyan plugins/spark/v3.5/spark/build/2.12/libs /home/jovyan/polaris_libs -RUN pip install -r /tmp/requirements.txt -RUN cd client/python && poetry lock && \ - python3 -m poetry install && \ - pip install -e . +COPY --chown=spark client /home/spark/client +COPY --chown=spark plugins/spark/v3.5/getting-started/notebooks/requirements.txt /tmp/notebook_requirements.txt +COPY --chown=spark regtests/requirements.txt /tmp +COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs /home/spark/polaris_libs -WORKDIR /home/jovyan/ +SHELL ["/bin/bash", "-c"] + +RUN python3 -m venv /home/spark/venv && \ + source /home/spark/venv/bin/activate && \ + pip install -r /tmp/requirements.txt -r /tmp/notebook_requirements.txt && \ + cd client/python && \ + poetry lock && \ + poetry install --all-extras + +EXPOSE 8888 +CMD ["/home/spark/venv/bin/jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--NotebookApp.token=''"] diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index de1833e93a..8e2be2918e 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -839,7 +839,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.12" }, "toc-autonumbering": false, "toc-showmarkdowntxt": false, diff --git a/plugins/spark/v3.5/getting-started/notebooks/requirements.txt b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt new file mode 100644 index 0000000000..1aaa3f13b9 --- /dev/null +++ b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +jupyterlab==4.4.10 +ipykernel==7.0.1