From fc626598bc1ccf5fc2c48d3deed7ace075e20e9d Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Wed, 22 Jan 2025 11:07:49 +0100 Subject: [PATCH 1/5] Bump CUDA base-image to 12.6.3-runtime-ubuntu20.04 for python 3.8 and 3.10 and to 12.6.3-runtime-ubuntu22.04 for python 3.11 --- .circleci/config.yml | 6 +++--- gpu/Dockerfile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b72bd98..352295b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -316,16 +316,16 @@ workflows: - build-and-push-gpu: name: Tensorflow 2.9 - cuda-tag: "11.4.3-runtime-ubuntu20.04" + cuda-tag: "12.6.3-runtime-ubuntu20.04" tf-version: "2.9.1" python-version: "3.8" - build-and-push-gpu: name: Tensorflow 2.11 - cuda-tag: "11.4.3-runtime-ubuntu20.04" + cuda-tag: "12.6.3-runtime-ubuntu20.04" tf-version: "2.11.1" python-version: "3.10" - build-and-push-gpu: name: Tensorflow 2.15 - cuda-tag: "12.2.2-runtime-ubuntu22.04" + cuda-tag: "12.6.3-runtime-ubuntu22.04" tf-version: "2.15.0" python-version: "3.11" diff --git a/gpu/Dockerfile b/gpu/Dockerfile index 770b6dc..8e593e1 100644 --- a/gpu/Dockerfile +++ b/gpu/Dockerfile @@ -1,4 +1,4 @@ -ARG CUDA_IMAGE_TAG=11.4.3-runtime-ubuntu20.04 +ARG CUDA_IMAGE_TAG=12.6.3-runtime-ubuntu20.04 FROM nvidia/cuda:${CUDA_IMAGE_TAG} ENV DEBIAN_FRONTEND=noninteractive From 9f5fa727887e99ed5e9bb532cc24f72cdc3e5eb8 Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Wed, 22 Jan 2025 17:32:05 +0100 Subject: [PATCH 2/5] Explicitly tag cuda, tf and ubuntu on GPU image --- .circleci/config.yml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 352295b..3648746 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -226,14 +226,17 @@ jobs: machine: image: ubuntu-2404:current parameters: - cuda-tag: + cuda-version: + type: string + ubuntu-version: type: string tf-version: type: string python-version: type: string environment: - CUDA_TAG: << parameters.cuda-tag >> + CUDA_VERSION: << parameters.cuda-version >> + UBUNTU_VERSION: << parameters.ubuntu-version >> TF_VERSION: << parameters.tf-version >> PY_VERSION: << parameters.python-version >> steps: @@ -244,13 +247,14 @@ jobs: name: Build and push python image command: | REPOSITORY="deepnote/tensorflow" - TAG="${TF_VERSION}-gpu" + TAG="gpu-${TF_VERSION}-${CUDA_VERSION}-${UBUNTU_VERSION}" TAG_SUFFIX="${CIRCLE_PULL_REQUEST:+-ra-${CIRCLE_PULL_REQUEST##*/}}" + CUDA_BASE_IMAGE_TAG=${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} docker buildx build \ --file=./gpu/Dockerfile \ --tag=978928340082.dkr.ecr.us-east-1.amazonaws.com/${REPOSITORY}:${TAG}${TAG_SUFFIX} \ --tag=docker.io/${REPOSITORY}:${TAG}${TAG_SUFFIX} \ - --build-arg CUDA_IMAGE_TAG=${CUDA_TAG} \ + --build-arg CUDA_IMAGE_TAG=${CUDA_BASE_IMAGE_TAG} \ --build-arg TF_VERSION=${TF_VERSION} \ --build-arg PYTHON_VER=${PY_VERSION} \ --cache-from type=registry,ref=docker.io/${REPOSITORY}:${TAG}-buildcache \ @@ -316,16 +320,19 @@ workflows: - build-and-push-gpu: name: Tensorflow 2.9 - cuda-tag: "12.6.3-runtime-ubuntu20.04" + cuda-version: "12.6.3" + ubuntu-version: "20.04" tf-version: "2.9.1" python-version: "3.8" - build-and-push-gpu: name: Tensorflow 2.11 - cuda-tag: "12.6.3-runtime-ubuntu20.04" + cuda-version: "12.6.3" + ubuntu-version: "20.04" tf-version: "2.11.1" python-version: "3.10" - build-and-push-gpu: name: Tensorflow 2.15 - cuda-tag: "12.6.3-runtime-ubuntu22.04" + cuda-version: "12.6.3" + ubuntu-version: "22.04" tf-version: "2.15.0" python-version: "3.11" From 3c3bde22fd8d95a6ec7623f69afbcf253e9db0b9 Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Wed, 22 Jan 2025 17:49:13 +0100 Subject: [PATCH 3/5] Add images with older CUDA version 11.8.0 still supports k80's 470.256.02 driver https://github.com/deepnote/ops/blob/42d47784b2588910b0deb6eee7d2365a31ced4e0/aws/cluster-production-nat.yaml#L809 https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions --- .circleci/config.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3648746..6e0a9b8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -336,3 +336,22 @@ workflows: ubuntu-version: "22.04" tf-version: "2.15.0" python-version: "3.11" + + - build-and-push-gpu: + name: Tensorflow 2.9 + cuda-version: "11.8.0" + ubuntu-version: "20.04" + tf-version: "2.9.1" + python-version: "3.8" + - build-and-push-gpu: + name: Tensorflow 2.11 + cuda-version: "11.8.0" + ubuntu-version: "20.04" + tf-version: "2.11.1" + python-version: "3.10" + - build-and-push-gpu: + name: Tensorflow 2.15 + cuda-version: "11.8.0" + ubuntu-version: "22.04" + tf-version: "2.15.0" + python-version: "3.11" From dc62a06c5f3e41ccfe0cb2bc9e086d71d1a6d3d3 Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Thu, 23 Jan 2025 17:43:10 +0100 Subject: [PATCH 4/5] Fix names of builds --- .circleci/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 6e0a9b8..47b101f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -319,38 +319,38 @@ workflows: - R << matrix.r-version >> - build-and-push-gpu: - name: Tensorflow 2.9 + name: Tensorflow 2.9, Cuda 12.6, Ubuntu 20.04 cuda-version: "12.6.3" ubuntu-version: "20.04" tf-version: "2.9.1" python-version: "3.8" - build-and-push-gpu: - name: Tensorflow 2.11 + name: Tensorflow 2.11, Cuda 12.6, Ubuntu 20.04 cuda-version: "12.6.3" ubuntu-version: "20.04" tf-version: "2.11.1" python-version: "3.10" - build-and-push-gpu: - name: Tensorflow 2.15 + name: Tensorflow 2.15, Cuda 12.6, Ubuntu 22.04 cuda-version: "12.6.3" ubuntu-version: "22.04" tf-version: "2.15.0" python-version: "3.11" - build-and-push-gpu: - name: Tensorflow 2.9 + name: Tensorflow 2.9, Cuda 11.8, Ubuntu 20.04 cuda-version: "11.8.0" ubuntu-version: "20.04" tf-version: "2.9.1" python-version: "3.8" - build-and-push-gpu: - name: Tensorflow 2.11 + name: Tensorflow 2.11, Cuda 11.8, Ubuntu 20.04 cuda-version: "11.8.0" ubuntu-version: "20.04" tf-version: "2.11.1" python-version: "3.10" - build-and-push-gpu: - name: Tensorflow 2.15 + name: Tensorflow 2.15, Cuda 11.8, Ubuntu 22.04 cuda-version: "11.8.0" ubuntu-version: "22.04" tf-version: "2.15.0" From 2d33ca8bd0e4a39bd8d20eceb3bce71d8e6866b6 Mon Sep 17 00:00:00 2001 From: Hannes Probst Date: Wed, 29 Jan 2025 20:19:31 +0100 Subject: [PATCH 5/5] Pin Conda to a Python 3.8 compatible version --- python/conda/Dockerfile.conda | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/conda/Dockerfile.conda b/python/conda/Dockerfile.conda index 1f0a341..c6f721e 100644 --- a/python/conda/Dockerfile.conda +++ b/python/conda/Dockerfile.conda @@ -17,7 +17,12 @@ ENV CONDA_ALWAYS_YES=true # but that would require baking in the URLs for # different Miniconda installer versions into the Dockerfile. ARG PYTHON_VERSION -RUN MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"; \ +RUN if [ "$PYTHON_VERSION" = "3.8" ]; then \ + # last version of conda compatible with python 3.8 + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-py38_23.11.0-2-Linux-x86_64.sh"; \ + else \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"; \ + fi && \ wget --quiet $MINICONDA_URL -O /tmp/miniconda.sh && \ /bin/bash /tmp/miniconda.sh -b -p /opt/conda && \ rm /tmp/miniconda.sh && \