From 9a66e27f799b44d885ee15c89fa20ef8c492338f Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 25 Oct 2025 13:58:57 -0500 Subject: [PATCH 1/5] Use official spark image --- .../v3.5/getting-started/docker-compose.yml | 2 +- .../v3.5/getting-started/notebooks/Dockerfile | 45 +++++++++++-------- .../notebooks/SparkPolaris.ipynb | 2 +- .../notebooks/requirements.txt | 2 + 4 files changed, 30 insertions(+), 21 deletions(-) create mode 100644 plugins/spark/v3.5/getting-started/notebooks/requirements.txt diff --git a/plugins/spark/v3.5/getting-started/docker-compose.yml b/plugins/spark/v3.5/getting-started/docker-compose.yml index e010d4a92c..fd7003936c 100644 --- a/plugins/spark/v3.5/getting-started/docker-compose.yml +++ b/plugins/spark/v3.5/getting-started/docker-compose.yml @@ -54,4 +54,4 @@ services: AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY POLARIS_HOST: polaris volumes: - - ./notebooks:/home/jovyan/notebooks + - ./notebooks:/home/spark/notebooks diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 3254ebb551..80cd43cc5d 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -17,31 +17,38 @@ # under the License. # -FROM jupyter/all-spark-notebook:spark-3.5.0 +FROM docker.io/apache/spark:3.5.6-java17 ENV LANGUAGE='en_US:en' +ENV SPARK_HOME=/opt/spark +ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages:$PYTHONPATH" +ENV PYSPARK_PYTHON=/home/spark/venv/bin/python +ENV POETRY_VIRTUALENVS_CREATE=false USER root -# Generic table support requires delta 3.2.1 -# Install Spark 3.5.6 -RUN wget -q https://www.apache.org/dyn/closer.lua/spark/spark-3.5.6/spark-3.5.6-bin-hadoop3.tgz?action=download \ - && tar -xzf spark-3.5.6-bin-hadoop3.tgz \ - && mv spark-3.5.6-bin-hadoop3 /opt/spark \ - && rm spark-3.5.6-bin-hadoop3.tgz +RUN apt-get update -y && \ + apt-get install -y python3-venv && \ + mkdir -p /home/spark && \ + chown -R spark /home/spark -# Set environment variables -ENV SPARK_HOME=/opt/spark -ENV PATH=$SPARK_HOME/bin:$PATH +USER spark + +WORKDIR /home/spark + +COPY --chown=spark client /home/spark/client +COPY --chown=spark plugins/spark/v3.5/getting-started/notebooks/requirements.txt /tmp/notebook_requirements.txt +COPY --chown=spark regtests/requirements.txt /tmp +COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs /home/spark/polaris_libs -USER jovyan +SHELL ["/bin/bash", "-c"] -COPY --chown=jovyan client /home/jovyan/client -COPY --chown=jovyan regtests/requirements.txt /tmp -COPY --chown=jovyan plugins/spark/v3.5/spark/build/2.12/libs /home/jovyan/polaris_libs -RUN pip install -r /tmp/requirements.txt -RUN cd client/python && poetry lock && \ - python3 -m poetry install && \ - pip install -e . +RUN python3 -m venv /home/spark/venv && \ + source /home/spark/venv/bin/activate && \ + pip install -r /tmp/requirements.txt -r /tmp/notebook_requirements.txt && \ + cd client/python && \ + poetry lock && \ + poetry install --all-extras -WORKDIR /home/jovyan/ +EXPOSE 8888 +CMD ["/home/spark/venv/bin/jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--NotebookApp.token=''"] diff --git a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb index de1833e93a..8e2be2918e 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb +++ b/plugins/spark/v3.5/getting-started/notebooks/SparkPolaris.ipynb @@ -839,7 +839,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.12" }, "toc-autonumbering": false, "toc-showmarkdowntxt": false, diff --git a/plugins/spark/v3.5/getting-started/notebooks/requirements.txt b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt new file mode 100644 index 0000000000..b71d4b96b8 --- /dev/null +++ b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt @@ -0,0 +1,2 @@ +jupyterlab==4.4.10 +ipykernel==7.0.1 From eba921882e54e15d298d41b880de494500dd1dcd Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 25 Oct 2025 14:11:55 -0500 Subject: [PATCH 2/5] Use official spark image --- .../notebooks/requirements.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/plugins/spark/v3.5/getting-started/notebooks/requirements.txt b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt index b71d4b96b8..1aaa3f13b9 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/requirements.txt +++ b/plugins/spark/v3.5/getting-started/notebooks/requirements.txt @@ -1,2 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + jupyterlab==4.4.10 ipykernel==7.0.1 From 3e78bf4f995f60b87d4fe3702f8c0a5063b54e66 Mon Sep 17 00:00:00 2001 From: Yong Date: Sat, 25 Oct 2025 14:38:56 -0500 Subject: [PATCH 3/5] Use official spark image --- plugins/spark/v3.5/getting-started/notebooks/Dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 80cd43cc5d..9af0b5fd36 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -19,11 +19,8 @@ FROM docker.io/apache/spark:3.5.6-java17 -ENV LANGUAGE='en_US:en' -ENV SPARK_HOME=/opt/spark -ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages:$PYTHONPATH" +ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" ENV PYSPARK_PYTHON=/home/spark/venv/bin/python -ENV POETRY_VIRTUALENVS_CREATE=false USER root From 20d30c23c51e4433294046ec6a31a27927df333a Mon Sep 17 00:00:00 2001 From: Yong Date: Thu, 30 Oct 2025 00:08:29 -0500 Subject: [PATCH 4/5] Use official spark image --- getting-started/spark/docker-compose.yml | 2 +- getting-started/spark/notebooks/Dockerfile | 35 ++++++++++++++----- .../spark/notebooks/SparkPolaris.ipynb | 2 +- .../v3.5/getting-started/notebooks/Dockerfile | 4 +-- regtests/notebook_requirements.txt | 21 +++++++++++ 5 files changed, 51 insertions(+), 13 deletions(-) create mode 100644 regtests/notebook_requirements.txt diff --git a/getting-started/spark/docker-compose.yml b/getting-started/spark/docker-compose.yml index 293c67d30d..f6a57795dc 100644 --- a/getting-started/spark/docker-compose.yml +++ b/getting-started/spark/docker-compose.yml @@ -52,4 +52,4 @@ services: AWS_REGION: us-west-2 POLARIS_HOST: polaris volumes: - - ./notebooks:/home/jovyan/notebooks + - ./notebooks:/home/spark/notebooks diff --git a/getting-started/spark/notebooks/Dockerfile b/getting-started/spark/notebooks/Dockerfile index def61a4893..32ee4067b6 100644 --- a/getting-started/spark/notebooks/Dockerfile +++ b/getting-started/spark/notebooks/Dockerfile @@ -17,15 +17,32 @@ # under the License. # -FROM jupyter/all-spark-notebook:spark-3.5.0 +FROM docker.io/apache/spark:3.5.6-java17 -ENV LANGUAGE='en_US:en' +ENV PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:/home/spark/venv/lib/python3.10/site-packages" +ENV PYSPARK_PYTHON=/home/spark/venv/bin/python -COPY --chown=jovyan client /home/jovyan/client -COPY --chown=jovyan regtests/requirements.txt /tmp -RUN pip install -r /tmp/requirements.txt && \ - cd client/python && poetry lock && \ - python3 -m poetry install && \ - pip install -e . +USER root -WORKDIR /home/jovyan/ +RUN apt-get update -y && \ + apt-get install -y python3-venv && \ + mkdir -p /home/spark && \ + chown -R spark /home/spark + +USER spark + +WORKDIR /home/spark + +COPY --chown=spark client /home/spark/client +COPY --chown=spark regtests/requirements.txt /tmp +COPY --chown=spark regtests/notebook_requirements.txt /tmp + +RUN python3 -m venv /home/spark/venv && \ + . /home/spark/venv/bin/activate && \ + pip install -r /tmp/requirements.txt -r /tmp/notebook_requirements.txt && \ + cd client/python && \ + poetry lock && \ + poetry install --all-extras + +EXPOSE 8888 +CMD ["/home/spark/venv/bin/jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--NotebookApp.token=''"] diff --git a/getting-started/spark/notebooks/SparkPolaris.ipynb b/getting-started/spark/notebooks/SparkPolaris.ipynb index 0e55137974..4eb04a7c58 100644 --- a/getting-started/spark/notebooks/SparkPolaris.ipynb +++ b/getting-started/spark/notebooks/SparkPolaris.ipynb @@ -792,7 +792,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.12" }, "toc-autonumbering": false, "toc-showmarkdowntxt": false, diff --git a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile index 9af0b5fd36..06947d85a5 100644 --- a/plugins/spark/v3.5/getting-started/notebooks/Dockerfile +++ b/plugins/spark/v3.5/getting-started/notebooks/Dockerfile @@ -34,14 +34,14 @@ USER spark WORKDIR /home/spark COPY --chown=spark client /home/spark/client -COPY --chown=spark plugins/spark/v3.5/getting-started/notebooks/requirements.txt /tmp/notebook_requirements.txt COPY --chown=spark regtests/requirements.txt /tmp +COPY --chown=spark regtests/notebook_requirements.txt /tmp COPY --chown=spark plugins/spark/v3.5/spark/build/2.12/libs /home/spark/polaris_libs SHELL ["/bin/bash", "-c"] RUN python3 -m venv /home/spark/venv && \ - source /home/spark/venv/bin/activate && \ + . /home/spark/venv/bin/activate && \ pip install -r /tmp/requirements.txt -r /tmp/notebook_requirements.txt && \ cd client/python && \ poetry lock && \ diff --git a/regtests/notebook_requirements.txt b/regtests/notebook_requirements.txt new file mode 100644 index 0000000000..1aaa3f13b9 --- /dev/null +++ b/regtests/notebook_requirements.txt @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +jupyterlab==4.4.10 +ipykernel==7.0.1 From 2a45d72878ee5ba9d21d3248059703d3fe34a14d Mon Sep 17 00:00:00 2001 From: Yong Date: Thu, 30 Oct 2025 00:13:49 -0500 Subject: [PATCH 5/5] Use official spark image --- regtests/notebook_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regtests/notebook_requirements.txt b/regtests/notebook_requirements.txt index 1aaa3f13b9..43dd867881 100644 --- a/regtests/notebook_requirements.txt +++ b/regtests/notebook_requirements.txt @@ -18,4 +18,4 @@ # jupyterlab==4.4.10 -ipykernel==7.0.1 +ipykernel==7.1.0