Update Docker build to improve caching and image size (#1866)

azavea · Aug 28, 2023 · 4f0ab5f · 4f0ab5f
1 parent f62c367
commit 4f0ab5f
Show file tree

Hide file tree

Showing 3 changed files with 92 additions and 57 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,35 @@
+ARG BUILD_TYPE
 ARG CUDA_VERSION
 ARG UBUNTU_VERSION
-FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-runtime-ubuntu${UBUNTU_VERSION}
+
+########################################################################
+
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-runtime-ubuntu${UBUNTU_VERSION} as thinbuild
+
+ARG PYTHON_VERSION=3.10
+
+# build-essential: installs gcc which is needed to install some deps like rasterio
+# libGL1: needed to avoid following error when using cv2
+# ImportError: libGL.so.1: cannot open shared object file: No such file or directory
+# See https://stackoverflow.com/questions/55313610/importerror-libgl-so-1-cannot-open-shared-object-file-no-such-file-or-directo
+RUN --mount=type=cache,target=/var/cache/apt apt update && \
+    apt install -y wget=1.21.2-2ubuntu1 build-essential=12.9ubuntu3 libgl1=1.4.0-1 curl=7.81.0-1ubuntu1.13 git=1:2.34.1-1ubuntu1.10 tree=2.0.2-1 gdal-bin=3.4.1+dfsg-1build4 libgdal-dev=3.4.1+dfsg-1build4 python${PYTHON_VERSION} python3-pip && \
+    curl -fsSL https://deb.nodesource.com/setup_16.x | bash - && \
+    apt install -y nodejs=16.20.2-deb-1nodesource1 && \
+    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && \
+    update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \
+    apt autoremove
+
+########################################################################
+
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-runtime-ubuntu${UBUNTU_VERSION} as fullbuild
+
+ARG PYTHON_VERSION=3.10
+ARG TARGETPLATFORM
+
+ENV PATH /opt/conda/bin:$PATH
+ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH
+ENV PROJ_LIB /opt/conda/share/proj/
 
 # wget: needed below to install conda
 # build-essential: installs gcc which is needed to install some deps like rasterio
@@ -11,9 +40,6 @@ RUN apt-get update && \
     apt-get install -y wget=1.* build-essential libgl1 curl git tree && \
     apt-get autoremove && apt-get autoclean && apt-get clean
 
-ARG PYTHON_VERSION=3.10
-ARG TARGETPLATFORM
-
 RUN case ${TARGETPLATFORM} in \
          "linux/arm64")  LINUX_ARCH=aarch64  ;; \
          *)              LINUX_ARCH=x86_64   ;; \
@@ -44,69 +70,75 @@ ENV GDAL_DATA=/opt/conda/lib/python${PYTHON_VERSION}/site-packages/rasterio/gdal
 RUN rm /opt/conda/lib/libtinfo.so.6 && \
     ln -s /lib/$(cat /root/linux_arch)-linux-gnu/libtinfo.so.6 /opt/conda/lib/libtinfo.so.6
 
-WORKDIR /opt/src/
-
-COPY ./requirements-dev.txt /opt/src/requirements-dev.txt
-RUN pip install -r requirements-dev.txt
+# This gets rid of the following error when importing cv2 on arm64.
+# We cannot use the ENV directive since it cannot be used conditionally.
+# See https://github.com/opencv/opencv/issues/14884
+# ImportError: /lib/aarch64-linux-gnu/libGLdispatch.so.0: cannot allocate memory in static TLS block
+RUN if [${TARGETARCH} == "arm64"]; \
+    then echo "export LD_PRELOAD=/lib/$(cat /root/linux_arch)-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD" >> /root/.bashrc; fi
 
-# Ideally we'd just pip install each package, but if we do that, then a lot of the image
-# will have to be re-built each time we make a change to source code. So, we split the
-# install into installing all the requirements first (filtering out any prefixed with
-# rastervision_*), and then copy over the source code.
+########################################################################
 
-# Install requirements for each package.
-# -E "^\s*$|^#|rastervision_*" means exclude blank lines, comment lines,
-# and rastervision plugins.
-COPY ./rastervision_pipeline/requirements.txt /opt/src/requirements.txt
-RUN pip install $(grep -ivE "^\s*$|^#|rastervision_*" requirements.txt)
+FROM ${BUILD_TYPE:-fullbuild} AS final_stage
 
-COPY ./rastervision_aws_s3/requirements.txt /opt/src/requirements.txt
-RUN pip install $(grep -ivE "^\s*$|^#|rastervision_*" requirements.txt)
+ARG TARGETARCH
 
-COPY ./rastervision_aws_batch/requirements.txt /opt/src/requirements.txt
-RUN pip install $(grep -ivE "^\s*$|^#|rastervision_*" requirements.txt)
+ENV LC_ALL C.UTF-8
+ENV LANG C.UTF-8
 
-COPY ./rastervision_core/requirements.txt /opt/src/requirements.txt
-RUN pip install $(grep -ivE "^\s*$|^#|rastervision_*" requirements.txt)
+WORKDIR /opt/src/
 
-COPY ./rastervision_pytorch_learner/requirements.txt /opt/src/requirements.txt
-RUN pip install $(grep -ivE "^\s*$|^#|rastervision_*" requirements.txt)
+#------------------------------------------------------------------------
+
+# Ideally we'd just pip install each package, but if we do that, then
+# a lot of the image will have to be re-built each time we make a
+# change to the code. So, we split the install into installing all the
+# requirements in bunches (filtering out any prefixed with
+# rastervision_*), and then copy over the source code.  The
+# dependencies are installed in bunches rather than package-by-package
+# or on a per-RV component basis to reduce the build time, the number
+# of layers, and the overall image size, and to reduce churn
+# (installing and uninstalling of Python packages during the build).
+#
+# The bunches are heuristic and are meant to keep the heaviest and/or
+# least-frequently-changing dependencies before the more variable
+# ones.  At time of writing, the amount of image size attributable to
+# PyTorch (and the amount of image size overall) is heavily dominated
+# by PyTorch, so it is first.
+
+# Install requirements.
+# -E "^\s*$|^#|rastervision_*" means exclude blank lines, comment lines,
+# and rastervision plugins.
 
-COPY ./rastervision_gdal_vsi/requirements.txt /opt/src/requirements.txt
-RUN pip install $(grep -ivE "^\s*$|^#|rastervision_*" requirements.txt)
+COPY ./rastervision_pytorch_learner/requirements.txt /opt/src/pytorch-requirements.txt
+RUN --mount=type=cache,target=/root/.cache/pip cat pytorch-requirements.txt | sort | uniq > all-requirements.txt && \
+    pip install $(grep -ivE "^\s*$|^#|rastervision_*" all-requirements.txt) && \
+    rm all-requirements.txt
 
-# Commented out because there are no non-RV deps and it will fail if uncommented.
-# COPY ./rastervision_pytorch_backend/requirements.txt /opt/src/requirements.txt
-# RUN pip install $(grep -ivE "^\s*$|^#|rastervision_*" requirements.txt)
+COPY ./rastervision_aws_batch/requirements.txt /opt/src/batch-requirements.txt
+COPY ./rastervision_aws_s3/requirements.txt /opt/src/s3-requirements.txt
+COPY ./rastervision_core/requirements.txt /opt/src/core-requirements.txt
+COPY ./rastervision_gdal_vsi/requirements.txt /opt/src/gdal-requirements.txt
+COPY ./rastervision_pipeline/requirements.txt /opt/src/pipeline-requirements.txt
+COPY ./requirements-dev.txt /opt/src/requirements-dev.txt
+RUN --mount=type=cache,target=/root/.cache/pip cat batch-requirements.txt s3-requirements.txt core-requirements.txt gdal-requirements.txt pipeline-requirements.txt requirements-dev.txt | sort | uniq > all-requirements.txt && \
+    pip install $(grep -ivE "^\s*$|^#|rastervision_*" all-requirements.txt) && \
+    rm all-requirements.txt
 
 #########################
 # Docs
 #########################
 # Install docs/requirements.txt
-COPY ./docs/requirements.txt /opt/src/docs/requirements.txt
-RUN pip install -r docs/requirements.txt
+COPY ./docs/requirements.txt /opt/src/docs/pandoc-requirements.txt
 
 # Install pandoc, needed for rendering notebooks
 # Get latest release link from here: https://github.com/jgm/pandoc/releases
-ARG TARGETARCH
-RUN wget https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-${TARGETARCH}.deb && \
+RUN --mount=type=cache,target=/root/.cache/pip pip install -r docs/pandoc-requirements.txt && \
+    wget https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-${TARGETARCH}.deb && \
     dpkg -i pandoc-2.19.2-1-${TARGETARCH}.deb && rm pandoc-2.19.2-1-${TARGETARCH}.deb
-#########################
 
-COPY scripts /opt/src/scripts/
-COPY scripts/rastervision /usr/local/bin/rastervision
-COPY tests /opt/src/tests/
-COPY integration_tests /opt/src/integration_tests/
-COPY .flake8 /opt/src/.flake8
-COPY .coveragerc /opt/src/.coveragerc
-
-# Needed for click to work
-ENV LC_ALL C.UTF-8
-ENV LANG C.UTF-8
-# Needed for GDAL 3.0
-ENV PROJ_LIB /opt/conda/share/proj/
+#------------------------------------------------------------------------
 
-# Copy code for each package.
 ENV PYTHONPATH=/opt/src:$PYTHONPATH
 ENV PYTHONPATH=/opt/src/rastervision_pipeline/:$PYTHONPATH
 ENV PYTHONPATH=/opt/src/rastervision_aws_s3/:$PYTHONPATH
@@ -116,6 +148,13 @@ ENV PYTHONPATH=/opt/src/rastervision_core/:$PYTHONPATH
 ENV PYTHONPATH=/opt/src/rastervision_pytorch_learner/:$PYTHONPATH
 ENV PYTHONPATH=/opt/src/rastervision_pytorch_backend/:$PYTHONPATH
 
+COPY scripts /opt/src/scripts/
+COPY scripts/rastervision /usr/local/bin/rastervision
+COPY tests /opt/src/tests/
+COPY integration_tests /opt/src/integration_tests/
+COPY .flake8 /opt/src/.flake8
+COPY .coveragerc /opt/src/.coveragerc
+
 COPY ./rastervision_pipeline/ /opt/src/rastervision_pipeline/
 COPY ./rastervision_aws_s3/ /opt/src/rastervision_aws_s3/
 COPY ./rastervision_aws_batch/ /opt/src/rastervision_aws_batch/
@@ -124,11 +163,4 @@ COPY ./rastervision_pytorch_learner/ /opt/src/rastervision_pytorch_learner/
 COPY ./rastervision_pytorch_backend/ /opt/src/rastervision_pytorch_backend/
 COPY ./rastervision_gdal_vsi/ /opt/src/rastervision_gdal_vsi/
 
-# This gets rid of the following error when importing cv2 on arm64.
-# We cannot use the ENV directive since it cannot be used conditionally.
-# See https://github.com/opencv/opencv/issues/14884
-# ImportError: /lib/aarch64-linux-gnu/libGLdispatch.so.0: cannot allocate memory in static TLS block
-RUN if [${TARGETARCH} == "arm64"]; \
-    then echo "export LD_PRELOAD=/lib/$(cat /root/linux_arch)-linux-gnu/libGLdispatch.so.0:$LD_PRELOAD" >> /root/.bashrc; fi
-
 CMD ["bash"]
diff --git a/rastervision_gdal_vsi/requirements.txt b/rastervision_gdal_vsi/requirements.txt
@@ -1,2 +1,2 @@
 rastervision_pipeline==0.21
-gdal==3.6.3
+gdal>=3.4.1<=3.6.3
diff --git a/scripts/cibuild b/scripts/cibuild
@@ -17,10 +17,13 @@ if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
     if [[ "${1:-}" == "--help" ]]; then
         usage
     else
-        docker build \
+        DOCKER_BUILDKIT=1 docker build \
+	    --build-arg BUILDKIT_INLINE_CACHE=1 \
+	    --build-arg BUILD_TYPE=fullbuild \
             --platform linux/amd64 \
             --build-arg CUDA_VERSION="12.1.1" \
             --build-arg UBUNTU_VERSION="22.04" \
+	    --cache-from=quay.io/azavea/raster-vision:pytorch-latest \
             -t "raster-vision-${IMAGE_TYPE}" \
             -f Dockerfile .