From 259c852f5fc56ed3f07efbe4693748419bc10934 Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Tue, 5 Nov 2024 17:12:17 +0100 Subject: [PATCH 1/6] Use matrix parameters to simplify job definition --- .circleci/config.yml | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1d5f8a7..23797ec 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -82,30 +82,24 @@ workflows: build: jobs: - build-and-push-python: - name: Python 3.8 - python-version: "3.8" - - build-and-push-python: - name: Python 3.9 - python-version: "3.9" - - build-and-push-python: - name: Python 3.10 - python-version: "3.10" - - build-and-push-python: - name: Python 3.11 - python-version: "3.11" + name: Python << matrix.python-version >> + matrix: + parameters: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" - build-and-push-python-datascience: - name: Python Datascience 3.8 - python-version: "3.8" - - build-and-push-python-datascience: - name: Python Datascience 3.9 - python-version: "3.9" - - build-and-push-python-datascience: - name: Python Datascience 3.10 - python-version: "3.10" - - build-and-push-python-datascience: - name: Python Datascience 3.11 - python-version: "3.11" + name: Python Datascience << matrix.python-version >> + matrix: + parameters: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" - build-and-push-gpu: name: Tensorflow 2.9 From dcd4861ba60080510da5d2f31bc300a627da2eaf Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Wed, 6 Nov 2024 18:27:36 +0100 Subject: [PATCH 2/6] Remove not needed Dockerfile --- python/Dockerfile | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 python/Dockerfile diff --git a/python/Dockerfile b/python/Dockerfile deleted file mode 100644 index 1d98fad..0000000 --- a/python/Dockerfile +++ /dev/null @@ -1,36 +0,0 @@ -ARG FROM_PYTHON_VERSION=3.7 -ARG DEBIAN_VERSION=buster -FROM python:${FROM_PYTHON_VERSION}-slim-${DEBIAN_VERSION} -ENV DEBIAN_FRONTEND noninteractive - -# Install OS dependencies -RUN apt-get update && apt-get -yq dist-upgrade \ - && apt-get install -yq --no-install-recommends \ - build-essential \ - bzip2 \ - cmake \ - curl \ - git \ - graphviz \ - libgtk2.0-dev \ - locales \ - sudo \ - unzip \ - vim \ - wget \ - ssh \ - gnupg2 \ - && rm -rf /var/lib/apt/lists/* - -# We create the virtual environment in the home directory in the Dockerfile -# for performance improvement. -RUN python -m venv --system-site-packages ~/venv - -ENV SHELL=/bin/bash \ - LC_ALL=en_US.UTF-8 \ - LANG=en_US.UTF-8 \ - LANGUAGE=en_US.UTF-8 \ - DEEPNOTE_PYTHON_KERNEL_ONLY=true - -RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ - locale-gen From cf6c0864d701df094ff5d2a7f2818126f16df41f Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Wed, 6 Nov 2024 18:28:47 +0100 Subject: [PATCH 3/6] Make more layers of python image reusable * by first installing system dependencies and then python Setting python version incl. patch Fix python version env var Improve readability of docker build output Use PR-base-image for datascience-image in a PR Enable buildKit layer caching Fix layer caching Remove breaking cache-parameter Next attempt to get buildcache to work Fix pip installation for datascience image Tweaked order of ARGs to invalidate less layers Use all caches in datascience builds Speed up builds by not loading images unnecessarily Ensure big apt-get layer is shared Don't build gpu images for now Fix main-builcache declaration Extracting apt-get into build-step to ensure reuse Share cache among all builds Build base image separately Strictly separate base from python image build Move build-arg so it's avaiable --- .circleci/config.yml | 102 +++++++++++++++++----- python/base/Dockerfile | 36 -------- python/base/Dockerfile.base | 31 +++++++ python/base/Dockerfile.python | 51 +++++++++++ python/datascience/Dockerfile.datascience | 11 +-- 5 files changed, 166 insertions(+), 65 deletions(-) delete mode 100644 python/base/Dockerfile create mode 100644 python/base/Dockerfile.base create mode 100644 python/base/Dockerfile.python diff --git a/.circleci/config.yml b/.circleci/config.yml index 23797ec..8b88ad5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,29 +1,72 @@ version: 2.1 orbs: - docker: circleci/docker@2.2.0 + docker: circleci/docker@2.8 + +commands: + split_python_version: + steps: + - run: + name: Split python version + command: | + echo "export PYTHON_VERSION=$(echo $PYTHON_VERSION_PATCH | cut -d'.' -f1,2)" >> $BASH_ENV + source $BASH_ENV + setup_buildkit_builder: + steps: + - run: + name: Setup BuildKit builder + command: | + docker buildx create \ + --name custom-builder \ + --use \ + --driver docker-container \ jobs: + build-and-push-base: + executor: docker/docker + steps: + - checkout + - setup_remote_docker + - setup_buildkit_builder + - docker/check: + use-docker-credentials-store: true + - docker/build: + step-name: Base python image (debian + apt-get deps) + path: ./python/base + dockerfile: Dockerfile.base + extra_build_args: | + --progress plain + --cache-from type=registry,ref=deepnote/python:base-buildcache,mode=max + --cache-to type=registry,ref=deepnote/python:base-buildcache,mode=max + --output type=registry,push=true + image: deepnote/python + tag: base${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}} build-and-push-python: executor: docker/docker parameters: python-version: type: string environment: - PYTHON_VERSION: << parameters.python-version >> + PYTHON_VERSION_PATCH: << parameters.python-version >> steps: - checkout - setup_remote_docker - docker/check: use-docker-credentials-store: true + - split_python_version + - setup_buildkit_builder - docker/build: - step-name: Base Python image + step-name: Python image path: ./python/base - dockerfile: Dockerfile - extra_build_args: "--build-arg FROM_PYTHON_VERSION=${PYTHON_VERSION}" - image: deepnote/python - tag: ${PYTHON_VERSION}${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}} - - docker/push: + dockerfile: Dockerfile.python + extra_build_args: | + --build-arg CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST} + --build-arg PYTHON_VERSION=${PYTHON_VERSION} + --build-arg PYTHON_VERSION_PATCH=${PYTHON_VERSION_PATCH} + --progress plain + --cache-from type=registry,ref=deepnote/python:${PYTHON_VERSION}-buildcache,mode=max + --cache-to type=registry,ref=deepnote/python:${PYTHON_VERSION}-buildcache,mode=max + --output type=registry,push=true image: deepnote/python tag: ${PYTHON_VERSION}${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}} build-and-push-python-datascience: @@ -32,24 +75,29 @@ jobs: python-version: type: string environment: - PYTHON_VERSION: << parameters.python-version >> + PYTHON_VERSION_PATCH: << parameters.python-version >> steps: - checkout - setup_remote_docker - docker/check: use-docker-credentials-store: true + - split_python_version + - setup_buildkit_builder - docker/build: step-name: Datascience Python image path: ./python/datascience docker-context: ./python/datascience dockerfile: Dockerfile.datascience - extra_build_args: "--build-arg FROM_PYTHON_VERSION=${PYTHON_VERSION}" + extra_build_args: | + --build-arg PYTHON_VERSION=${PYTHON_VERSION} + --build-arg CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST} + --progress plain + --cache-from type=registry,ref=deepnote/python:${PYTHON_VERSION}-buildcache,mode=max + --cache-from type=registry,ref=deepnote/python-datascience:${PYTHON_VERSION}-buildcache,mode=max + --cache-to type=registry,ref=deepnote/python-datascience:${PYTHON_VERSION}-buildcache,mode=max + --output type=registry,push=true image: deepnote/python-datascience tag: ${PYTHON_VERSION}${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}} - - docker/push: - image: deepnote/python-datascience - tag: ${PYTHON_VERSION}${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}} - build-and-push-gpu: executor: docker/docker parameters: @@ -68,10 +116,11 @@ jobs: - setup_remote_docker - docker/check: use-docker-credentials-store: true + - split_python_version - docker/build: step-name: Tensorflow GPU image path: ./gpu - extra_build_args: "--build-arg CUDA_IMAGE_TAG=${CUDA_TAG} --build-arg TF_VERSION=${TF_VERSION} --build-arg PYTHON_VER=${PY_VERSION}" + extra_build_args: "--build-arg CUDA_IMAGE_TAG=${CUDA_TAG} --build-arg TF_VERSION=${TF_VERSION} --build-arg PYTHON_VER=${PY_VERSION} --progress plain" image: deepnote/tensorflow tag: ${TF_VERSION}${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}}-gpu - docker/push: @@ -81,25 +130,30 @@ jobs: workflows: build: jobs: + - build-and-push-base: + name: Building the base image - build-and-push-python: name: Python << matrix.python-version >> matrix: parameters: python-version: - - "3.8" - - "3.9" - - "3.10" - - "3.11" - + - "3.8.19" + - "3.9.19" + - "3.10.15" + - "3.11.10" + requires: + - Building the base image - build-and-push-python-datascience: name: Python Datascience << matrix.python-version >> matrix: parameters: python-version: - - "3.8" - - "3.9" - - "3.10" - - "3.11" + - "3.8.19" + - "3.9.19" + - "3.10.15" + - "3.11.10" + requires: + - Python << matrix.python-version >> - build-and-push-gpu: name: Tensorflow 2.9 diff --git a/python/base/Dockerfile b/python/base/Dockerfile deleted file mode 100644 index 148bd69..0000000 --- a/python/base/Dockerfile +++ /dev/null @@ -1,36 +0,0 @@ -ARG FROM_PYTHON_VERSION=3.7 -ARG DEBIAN_VERSION=bullseye -FROM python:${FROM_PYTHON_VERSION}-slim-${DEBIAN_VERSION} -ENV DEBIAN_FRONTEND noninteractive - -# Install OS dependencies -RUN apt-get update && apt-get -yq dist-upgrade \ - && apt-get install -yq --no-install-recommends \ - build-essential \ - bzip2 \ - cmake \ - curl \ - git \ - graphviz \ - libgtk2.0-dev \ - locales \ - sudo \ - unzip \ - vim \ - wget \ - ssh \ - gnupg2 \ - && rm -rf /var/lib/apt/lists/* - -# We create the virtual environment in the home directory in the Dockerfile -# for performance improvement. -RUN python -m venv --system-site-packages ~/venv - -ENV SHELL=/bin/bash \ - LC_ALL=en_US.UTF-8 \ - LANG=en_US.UTF-8 \ - LANGUAGE=en_US.UTF-8 \ - DEEPNOTE_PYTHON_KERNEL_ONLY=true - -RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ - locale-gen diff --git a/python/base/Dockerfile.base b/python/base/Dockerfile.base new file mode 100644 index 0000000..6ba69f0 --- /dev/null +++ b/python/base/Dockerfile.base @@ -0,0 +1,31 @@ +FROM debian:bullseye-slim +ENV DEBIAN_FRONTEND noninteractive + +# Install OS dependencies +RUN apt-get update && apt-get -yq dist-upgrade \ +&& apt-get install -yq --no-install-recommends \ +build-essential \ +bzip2 \ +cmake \ +curl \ +git \ +graphviz \ +libgtk2.0-dev \ +locales \ +sudo \ +unzip \ +vim \ +wget \ +ssh \ +gnupg2 \ +&& apt-get clean \ +&& rm -rf /var/lib/apt/lists/* + +ENV SHELL=/bin/bash \ + LC_ALL=en_US.UTF-8 \ + LANG=en_US.UTF-8 \ + LANGUAGE=en_US.UTF-8 \ + DEEPNOTE_PYTHON_KERNEL_ONLY=true + +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ + locale-gen diff --git a/python/base/Dockerfile.python b/python/base/Dockerfile.python new file mode 100644 index 0000000..3a921e0 --- /dev/null +++ b/python/base/Dockerfile.python @@ -0,0 +1,51 @@ +ARG CIRCLE_PULL_REQUEST +FROM debian:bullseye-slim AS builder + +# Install dependencies for building Python +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + cmake \ + build-essential \ + libssl-dev \ + zlib1g-dev \ + libncurses5-dev \ + libncursesw5-dev \ + libreadline-dev \ + libsqlite3-dev \ + libgdbm-dev \ + libdb5.3-dev \ + libbz2-dev \ + libexpat1-dev \ + liblzma-dev \ + tk-dev \ + libffi-dev \ + uuid-dev \ + ca-certificates \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Download and extract the Python source code +WORKDIR /tmp +ARG PYTHON_VERSION_PATCH +RUN wget -qO- "https://www.python.org/ftp/python/${PYTHON_VERSION_PATCH}/Python-${PYTHON_VERSION_PATCH}.tgz" | tar xvz \ + && cd "/tmp/Python-${PYTHON_VERSION_PATCH}" \ + && ./configure --enable-optimizations --with-ensurepip=install \ + && make -j "$(nproc)" \ + && make altinstall + + +FROM deepnote/python:base${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}} +# Copy Python from the builder stage +ARG PYTHON_VERSION +# Layers will be different between python versions from here onwards because of the build-arg + +COPY --from=builder "/usr/local/bin/python${PYTHON_VERSION}" "/usr/local/bin/python${PYTHON_VERSION}" +COPY --from=builder "/usr/local/bin/pip${PYTHON_VERSION}" "/usr/local/bin/pip${PYTHON_VERSION}" +COPY --from=builder "/usr/local/lib/python${PYTHON_VERSION}" "/usr/local/lib/python${PYTHON_VERSION}" + +RUN update-alternatives --install /usr/bin/python python "/usr/local/bin/python${PYTHON_VERSION}" 1 +RUN update-alternatives --install /usr/bin/pip pip "/usr/local/bin/pip${PYTHON_VERSION}" 1 + +# We create the virtual environment in the home directory in the Dockerfile +# for performance improvement. +RUN python -m venv --system-site-packages ~/venv diff --git a/python/datascience/Dockerfile.datascience b/python/datascience/Dockerfile.datascience index 8deb165..4e3b8cf 100644 --- a/python/datascience/Dockerfile.datascience +++ b/python/datascience/Dockerfile.datascience @@ -1,5 +1,6 @@ -ARG FROM_PYTHON_VERSION=3.8 -FROM deepnote/python:${FROM_PYTHON_VERSION} +ARG PYTHON_VERSION=3.8 +ARG CIRCLE_PULL_REQUEST +FROM deepnote/python:${PYTHON_VERSION}${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}} RUN pip install --upgrade pip @@ -10,11 +11,11 @@ ADD requirements-below-3.11.txt /requirements-below-3.11.txt ADD constraints.txt /constraints.txt #Determine the Python version and set the version-specifications file -ARG FROM_PYTHON_VERSION +ARG PYTHON_VERSION RUN python -m pip install --upgrade setuptools pip -RUN if [ "$(printf '%s\n' "$FROM_PYTHON_VERSION" "3.11" | sort -V | head -n1)" = "3.11" ]; then \ +RUN if [ "$(printf '%s\n' "$PYTHON_VERSION" "3.11" | sort -V | head -n1)" = "3.11" ]; then \ mv "requirements-3.11+.txt" "requirements.txt" \ ; else \ mv "requirements-below-3.11.txt" "requirements.txt" \ @@ -28,4 +29,4 @@ RUN apt-get update \ && apt-get purge -y gcc \ && apt-get autoremove -y \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* /root/.cache \ No newline at end of file + && rm -rf /var/lib/apt/lists/* /root/.cache From bb91be7c605fe382a46bd31d77a71e5b27031dab Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Fri, 8 Nov 2024 18:12:42 +0100 Subject: [PATCH 4/6] Ensure images are for amd64 only otherwise manifests are including "architecture": "unknown" --- .circleci/config.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8b88ad5..3859dc1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -36,6 +36,7 @@ jobs: dockerfile: Dockerfile.base extra_build_args: | --progress plain + --platform linux/amd64 --cache-from type=registry,ref=deepnote/python:base-buildcache,mode=max --cache-to type=registry,ref=deepnote/python:base-buildcache,mode=max --output type=registry,push=true @@ -64,6 +65,7 @@ jobs: --build-arg PYTHON_VERSION=${PYTHON_VERSION} --build-arg PYTHON_VERSION_PATCH=${PYTHON_VERSION_PATCH} --progress plain + --platform linux/amd64 --cache-from type=registry,ref=deepnote/python:${PYTHON_VERSION}-buildcache,mode=max --cache-to type=registry,ref=deepnote/python:${PYTHON_VERSION}-buildcache,mode=max --output type=registry,push=true @@ -92,6 +94,7 @@ jobs: --build-arg PYTHON_VERSION=${PYTHON_VERSION} --build-arg CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST} --progress plain + --platform linux/amd64 --cache-from type=registry,ref=deepnote/python:${PYTHON_VERSION}-buildcache,mode=max --cache-from type=registry,ref=deepnote/python-datascience:${PYTHON_VERSION}-buildcache,mode=max --cache-to type=registry,ref=deepnote/python-datascience:${PYTHON_VERSION}-buildcache,mode=max From 0d177954aed57d293da1fcae14c9f58cbf7670f4 Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Mon, 11 Nov 2024 15:23:02 +0100 Subject: [PATCH 5/6] Add missing ca-certificates to prevent errors like `certificate verify failed: unable to get local issuer certificate` --- python/base/Dockerfile.base | 1 + 1 file changed, 1 insertion(+) diff --git a/python/base/Dockerfile.base b/python/base/Dockerfile.base index 6ba69f0..04eb36d 100644 --- a/python/base/Dockerfile.base +++ b/python/base/Dockerfile.base @@ -18,6 +18,7 @@ vim \ wget \ ssh \ gnupg2 \ +ca-certificates \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* From 12228e4e09079d64a6145d6f4d96520ad3b0933e Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Mon, 18 Nov 2024 09:59:49 +0100 Subject: [PATCH 6/6] Centralize python-version list --- .circleci/config.yml | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3859dc1..b7683aa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -130,6 +130,12 @@ jobs: image: deepnote/tensorflow tag: ${TF_VERSION}${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}}-gpu +python-versions: &python-versions + - "3.8.19" + - "3.9.19" + - "3.10.15" + - "3.11.10" + workflows: build: jobs: @@ -139,22 +145,14 @@ workflows: name: Python << matrix.python-version >> matrix: parameters: - python-version: - - "3.8.19" - - "3.9.19" - - "3.10.15" - - "3.11.10" + python-version: *python-versions requires: - Building the base image - build-and-push-python-datascience: name: Python Datascience << matrix.python-version >> matrix: parameters: - python-version: - - "3.8.19" - - "3.9.19" - - "3.10.15" - - "3.11.10" + python-version: *python-versions requires: - Python << matrix.python-version >>