Skip to content

Commit

Permalink
[CI] Add nightly CI job to test against dev version of deps (#10351)
Browse files Browse the repository at this point in the history
* [CI] Add nightly CI job to test against dev version of deps

* Update build-containers.sh

* Add build step

* Wait for build artifact

* Try pinning dask

* Address reviewers' comments

* Fix unbound variable error

* Specify dev version exactly

* Pin dask=2024.1.1
  • Loading branch information
hcho3 committed Jun 4, 2024
1 parent eb6622f commit 4057f86
Show file tree
Hide file tree
Showing 14 changed files with 120 additions and 18 deletions.
6 changes: 3 additions & 3 deletions tests/buildkite/build-containers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ case "${container}" in
cpu)
;;

gpu)
gpu|gpu_build_centos7)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
;;

gpu_build_centos7)
gpu_dev_ver)
BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION"
BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$DEV_RAPIDS_VERSION"
;;

jvm_gpu_build)
Expand Down
1 change: 1 addition & 0 deletions tests/buildkite/conftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ set -x
CUDA_VERSION=11.8.0
NCCL_VERSION=2.16.5-1
RAPIDS_VERSION=24.04
DEV_RAPIDS_VERSION=24.06
SPARK_VERSION=3.4.0
JDK_VERSION=8
R_VERSION=4.3.2
Expand Down
37 changes: 37 additions & 0 deletions tests/buildkite/pipeline-nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Nightly CI pipeline, to test against dev versions of dependencies

env:
DOCKER_CACHE_ECR_ID: "492475357299"
DOCKER_CACHE_ECR_REGION: "us-west-2"
DISABLE_RELEASE: "1"
# Skip uploading artifacts to S3 bucket
# Also, don't build all CUDA archs; just build sm_75
USE_DEPS_DEV_VER: "1"
# Use dev versions of RAPIDS and other dependencies
steps:
#### -------- CONTAINER BUILD --------
- label: ":docker: Build containers"
commands:
- "tests/buildkite/build-containers.sh gpu_build_centos7"
- "tests/buildkite/build-containers.sh gpu_dev_ver"
key: build-containers
agents:
queue: linux-amd64-cpu
- wait

- label: ":console: Build CUDA"
command: "tests/buildkite/build-cuda.sh"
key: build-cuda
agents:
queue: linux-amd64-cpu
- wait
- label: ":console: Test Python package, single GPU"
command: "tests/buildkite/test-python-gpu.sh gpu"
key: test-python-gpu
agents:
queue: linux-amd64-gpu
- label: ":console: Test Python package, 4 GPUs"
command: "tests/buildkite/test-python-gpu.sh mgpu"
key: test-python-mgpu
agents:
queue: linux-amd64-mgpu
15 changes: 12 additions & 3 deletions tests/buildkite/test-python-gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,19 @@ chmod +x build/testxgboost
# Allocate extra space in /dev/shm to enable NCCL
export CI_DOCKER_EXTRA_PARAMS_INIT='--shm-size=4g'

command_wrapper="tests/ci_build/ci_build.sh gpu --use-gpus --build-arg "`
if [[ -z "${USE_DEPS_DEV_VER-}" ]]
then
container_tag='gpu'
rapids_version=${RAPIDS_VERSION}
else
container_tag='gpu_dev_ver'
rapids_version=${DEV_RAPIDS_VERSION}
fi

command_wrapper="tests/ci_build/ci_build.sh ${container_tag} --use-gpus --build-arg "`
`"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "`
`"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION"
`"RAPIDS_VERSION_ARG=${rapids_version} --build-arg "`
`"NCCL_VERSION_ARG=$NCCL_VERSION"

# Run specified test suite
case "$suite" in
Expand Down
3 changes: 3 additions & 0 deletions tests/buildkite/update-rapids.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ set -euo pipefail

LATEST_RAPIDS_VERSION=$(gh api repos/rapidsai/cuml/releases/latest --jq '.name' | sed -e 's/^v\([[:digit:]]\+\.[[:digit:]]\+\).*/\1/')
echo "LATEST_RAPIDS_VERSION = $LATEST_RAPIDS_VERSION"
DEV_RAPIDS_VERSION=$(date +%Y-%m-%d -d "20${LATEST_RAPIDS_VERSION//./-}-01 + 2 month" | cut -c3-7 | tr - .)
echo "DEV_RAPIDS_VERSION = $DEV_RAPIDS_VERSION"

PARENT_PATH=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )

sed -i "s/^RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/RAPIDS_VERSION=${LATEST_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh
sed -i "s/^DEV_RAPIDS_VERSION=[[:digit:]]\+\.[[:digit:]]\+/DEV_RAPIDS_VERSION=${DEV_RAPIDS_VERSION}/" $PARENT_PATH/conftest.sh
4 changes: 2 additions & 2 deletions tests/ci_build/Dockerfile.aarch64
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ RUN \
yum update -y && \
yum install -y devtoolset-9 && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-aarch64.sh && \
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-aarch64.sh && \
bash conda.sh -b -p /opt/mambaforge

ENV PATH=/opt/mambaforge/bin:$PATH
Expand All @@ -23,7 +23,7 @@ ENV GOSU_VERSION 1.10
COPY conda_env/aarch64_test.yml /scripts/
RUN mamba create -n aarch64_test && \
mamba env update -n aarch64_test --file=/scripts/aarch64_test.yml && \
mamba clean --all
mamba clean --all --yes

# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
Expand Down
4 changes: 2 additions & 2 deletions tests/ci_build/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN \
apt-get update && \
apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-9 g++-9 openjdk-8-jdk-headless && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge

ENV PATH=/opt/mambaforge/bin:$PATH
Expand All @@ -36,7 +36,7 @@ RUN git clone -b v1.49.1 https://github.com/grpc/grpc.git \
COPY conda_env/linux_cpu_test.yml /scripts/
RUN mamba create -n linux_cpu_test && \
mamba env update -n linux_cpu_test --file=/scripts/linux_cpu_test.yml && \
mamba clean --all && \
mamba clean --all --yes && \
conda run --no-capture-output -n linux_cpu_test pip install buildkite-test-collector

# Install lightweight sudo (not bound to TTY)
Expand Down
6 changes: 3 additions & 3 deletions tests/ci_build/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,22 @@ RUN \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge

ENV PATH=/opt/mambaforge/bin:$PATH

# Create new Conda environment with cuDF, Dask, and cuPy
RUN \
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
mamba create -y -n gpu_test -c rapidsai -c nvidia -c conda-forge \
mamba create -y -n gpu_test -c rapidsai -c conda-forge -c nvidia \
python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
"nccl>=${NCCL_SHORT_VER}" \
dask=2024.1.1 \
dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
"pyspark>=3.4.0" cloudpickle cuda-python && \
mamba clean --all && \
mamba clean --all --yes && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector

ENV GOSU_VERSION 1.10
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.gpu_build_centos7
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ RUN \
yum -y update && \
yum install -y tar unzip wget xz git which ninja-build devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/python -m pip install awscli && \
# CMake
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.gpu_build_r_centos7
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ RUN \

run \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/python -m pip install auditwheel awscli && \
# CMake
Expand Down
52 changes: 52 additions & 0 deletions tests/ci_build/Dockerfile.gpu_dev_ver
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Container to test XGBoost against dev versions of dependencies

ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu22.04
ARG CUDA_VERSION_ARG
ARG RAPIDS_VERSION_ARG
# Should be first 4 digits of the dev version (e.g. 24.06)
ARG NCCL_VERSION_ARG

# Environment
ENV DEBIAN_FRONTEND noninteractive
SHELL ["/bin/bash", "-c"] # Use Bash as shell

# Install all basic requirements
RUN \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub && \
apt-get update && \
apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge

ENV PATH=/opt/mambaforge/bin:$PATH

# Create new Conda environment with dev versions of cuDF, Dask, and cuPy
RUN \
export NCCL_SHORT_VER=$(echo "$NCCL_VERSION_ARG" | cut -d "-" -f 1) && \
mamba create -y -n gpu_test -c rapidsai-nightly -c conda-forge -c nvidia \
python=3.10 "cudf=$RAPIDS_VERSION_ARG.*" "rmm=$RAPIDS_VERSION_ARG.*" cudatoolkit=$CUDA_VERSION_ARG \
"nccl>=${NCCL_SHORT_VER}" \
dask=2024.1.1 \
"dask-cuda=$RAPIDS_VERSION_ARG.*" "dask-cudf=$RAPIDS_VERSION_ARG.*" cupy \
numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
"pyspark>=3.4.0" cloudpickle cuda-python && \
mamba clean --all --yes && \
conda run --no-capture-output -n gpu_test pip install buildkite-test-collector

ENV GOSU_VERSION 1.10
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/

# Install lightweight sudo (not bound to TTY)
RUN set -ex; \
wget -nv -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
chmod +x /usr/local/bin/gosu && \
gosu nobody true

# Default entry-point to use if running locally
# It will preserve attributes of created files
COPY entrypoint.sh /scripts/

WORKDIR /workspace
ENTRYPOINT ["/scripts/entrypoint.sh"]
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.jvm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ RUN \
devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ \
devtoolset-9-runtime devtoolset-9-libstdc++-devel && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
# CMake
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.jvm_cross
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ RUN \
apt-get update && \
apt-get install -y tar unzip wget openjdk-$JDK_VERSION-jdk libgomp1 && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
/opt/mambaforge/bin/pip install awscli && \
# Maven
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.jvm_gpu_build
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN \
yum -y update && \
yum install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel devtoolset-9-gcc devtoolset-9-binutils devtoolset-9-gcc-c++ && \
# Python
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/22.11.1-2/Mambaforge-22.11.1-2-Linux-x86_64.sh && \
wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-x86_64.sh && \
bash conda.sh -b -p /opt/mambaforge && \
# CMake
wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \
Expand Down

0 comments on commit 4057f86

Please sign in to comment.