From 9a66e27f799b44d885ee15c89fa20ef8c492338f Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 25 Oct 2025 13:58:57 -0500 Subject: [PATCH 1/3] Use official spark image --- .../v3.5/getting-started/docker-compose.yml | 2 +- .../v3.5/getting-started/notebooks/Dockerfile | 45 +++++++++++-------- .../notebooks/SparkPolaris.ipynb | 2 +- .../notebooks/requirements.txt | 2 + 4 files changed, 30 insertions(+), 21 deletions(-) create mode 100644 plugins/spark/v3.5/getting-started/notebooks/requirements.txt diff --git a/plugins/spark/v3.5/getting-started/docker-compose.yml b/plugins/spark/v3.5/getting-started/docker-compose.yml index e010d4a92c..fd7003936c 100644 --- a/plugins/spark/v3.5/getting-started/docker-compose.yml +++ b/plugins/spark/v3.5/getting-started/docker-compose.yml @@ -54,4 +54,4 @@ services: AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY POLARIS_HOST: polaris volumes: - - ./notebooks:/home/jovyan/notebooks + - ./notebooks:/home/spark/notebooks diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 3254ebb551..80cd43cc5d 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -17,31 +17,38 @@ # under the License. # -FROM jupyter/all-spark-notebook:spark-3.5.0 +FROM docker.io/apache/spark:3.5.6-java17 ENV LANGUAGE='en_US:en' +ENV SPARK_HOME=/opt/spark +ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages:$PYTHONPATH" +ENV PYSPARK_PYTHON=/home/spark/venv/bin/python +ENV POETRY_VIRTUALENVS_CREATE=false USER root -# Generic table support requires delta 3.2.1 -# Install Spark 3.5.6 -RUN wget -q https://www.apache.org/dyn/closer.lua/spark/spark-3.5.6/spark-3.5.6-bin-hadoop3.tgz?action=download \ - && tar -xzf spark-3.5.6-bin-hadoop3.tgz \ - && mv spark-3.5.6-bin-hadoop3 /opt/spark \ - && rm spark-3.5.6-bin-hadoop3.tgz +RUN apt-get update -y && \ + apt-get install -y python3-venv && \ + mkdir -p /home/spark && \ + chown -R spark /home/spark -# Set environment variables -ENV SPARK_HOME=/opt/spark -ENV PATH=$SPARK_HOME/bin:$PATH +USER spark + +WORKDIR /home/spark + +COPY --chown=spark client /home/spark/client +COPY --chown=spark plugins/spark/v3.5/getting-started/notebooks/requirements.txt /tmp/notebook_requirements.txt +COPY --chown=spark regtests/requirements.txt /tmp +COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs /home/spark/polaris_libs -USER jovyan +SHELL ["/bin/bash", "-c"] -COPY --chown=jovyan client /home/jovyan/client -COPY --chown=jovyan regtests/requirements.txt /tmp -COPY --chown=jovyan plugins/spark/v3.5/spark/build/2.12/libs /home/jovyan/polaris_libs -RUN pip install -r /tmp/requirements.txt -RUN cd client/python && poetry lock && \ - python3 -m poetry install && \ - pip install -e . +RUN python3 -m venv /home/spark/venv && \ + source /home/spark/venv/bin/activate && \ + pip install -r /tmp/requirements.txt -r /tmp/notebook_requirements.txt && \ + cd client/python && \ + poetry lock && \ + poetry install --all-extras -WORKDIR /home/jovyan/ +EXPOSE 8888 +CMD ["/home/spark/venv/bin/jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--NotebookApp.token=''"] diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index de1833e93a..8e2be2918e 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -839,7 +839,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.12" }, "toc-autonumbering": false, "toc-showmarkdowntxt": false, diff --git a/plugins/spark/v3.5/getting-started/notebooks/requirements.txt b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt new file mode 100644 index 0000000000..b71d4b96b8 --- /dev/null +++ b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt @@ -0,0 +1,2 @@ +jupyterlab==4.4.10 +ipykernel==7.0.1 From eba921882e54e15d298d41b880de494500dd1dcd Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 25 Oct 2025 14:11:55 -0500 Subject: [PATCH 2/3] Use official spark image --- .../notebooks/requirements.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/plugins/spark/v3.5/getting-started/notebooks/requirements.txt b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt index b71d4b96b8..1aaa3f13b9 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/requirements.txt +++ b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt @@ -1,2 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + jupyterlab==4.4.10 ipykernel==7.0.1 From 3e78bf4f995f60b87d4fe3702f8c0a5063b54e66 Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 25 Oct 2025 14:38:56 -0500 Subject: [PATCH 3/3] Use official spark image --- plugins/spark/v3.5/getting-started/notebooks/Dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 80cd43cc5d..9af0b5fd36 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -19,11 +19,8 @@ FROM docker.io/apache/spark:3.5.6-java17 -ENV LANGUAGE='en_US:en' -ENV SPARK_HOME=/opt/spark -ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages:$PYTHONPATH" +ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" ENV PYSPARK_PYTHON=/home/spark/venv/bin/python -ENV POETRY_VIRTUALENVS_CREATE=false USER root