Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
dffba2a
build: introduce dynamo base container and remove current Dockerfile.…
nv-tusharma Jul 31, 2025
c351d5d
minor dockerfile fixes + use github dockerfile in premerge
nv-tusharma Jul 31, 2025
e5ddaba
Use dev target for dynamo build
nv-tusharma Jul 31, 2025
0bb8549
Add NATS + ETCD installation to dynamo base
nv-tusharma Jul 31, 2025
a926bca
remove rust build
nv-anants Jul 31, 2025
9080bab
Add back nats/etcd
nv-anants Jul 31, 2025
3666c30
Add back protobuf
nv-anants Jul 31, 2025
137853f
Remove Dockerfile.none from /containers
nv-tusharma Jul 31, 2025
88caa6a
Install essential python dependencies for pytest testing
nv-tusharma Jul 31, 2025
76a629e
add nixl
nv-anants Jul 31, 2025
877ed68
move installs to last
nv-anants Jul 31, 2025
86ac6e5
update
nv-anants Aug 1, 2025
14a88a7
Merge branch 'main' into dynamo-base-container-dev
nv-tusharma Aug 1, 2025
11f538b
Revert back to whl install
nv-anants Aug 1, 2025
3e6dc7e
improvements
nv-anants Aug 1, 2025
725804e
ADd integration tests
nv-anants Aug 1, 2025
5329473
docker compose
nv-anants Aug 1, 2025
0365d5c
add tests
nv-anants Aug 1, 2025
d16334a
skip
nv-anants Aug 1, 2025
5bf9bb6
Merge branch 'main' into dynamo-base-container-dev
nv-anants Aug 1, 2025
e88d484
Remove sleep 15 to test if docker compose is a blocking op
nv-tusharma Aug 1, 2025
5f65476
minor syntax fix for NIXL install
nv-tusharma Aug 1, 2025
afa92fc
Use dnf and minor optimizations in NIXL/UCX build to reduce steps
nv-tusharma Aug 1, 2025
289c76d
Merge branch 'main' into dynamo-base-container-dev
nv-tusharma Aug 4, 2025
cd448ef
Rename pre-merge-python.yml to build-and-test.yml for better clarity
nv-tusharma Aug 5, 2025
45cf4b9
Merge branch 'main' into dynamo-base-container-dev
nv-tusharma Aug 5, 2025
ee0e797
Merge branch 'main' into dynamo-base-container-dev
nv-tusharma Aug 5, 2025
5e17d5f
Revert changes to sglang and trt llm dockerfiles
nv-tusharma Aug 6, 2025
f735136
nit:address minor comments + move pip install commands earlier to imp…
nv-tusharma Aug 6, 2025
ba701cd
Merge branch 'main' into dynamo-base-container-dev
nv-tusharma Aug 6, 2025
057a855
Install benchmarks in the same layer as dynamo installation
nv-tusharma Aug 6, 2025
bc9d376
Revert "Merge branch 'main' into dynamo-base-container-dev"
nv-tusharma Aug 6, 2025
b671f9c
Reapply "Merge branch 'main' into dynamo-base-container-dev"
nv-tusharma Aug 6, 2025
8d88605
Merge branch 'main' into dynamo-base-container-dev
nv-tusharma Aug 6, 2025
e9579cf
Merge branch 'main' into dynamo-base-container-dev
nv-tusharma Aug 7, 2025
417063a
nit: introduce minor improvements and suggestions
nv-tusharma Aug 7, 2025
9a70596
Change framework CLI arg to none
nv-tusharma Aug 7, 2025
612b5bc
Mount workspace as a volume for rust + pytest checks
nv-tusharma Aug 7, 2025
d87cf7e
Pass in is_static:False into entrypoint integration test
nv-tusharma Aug 7, 2025
831e37e
nit:minor formatting fix
nv-tusharma Aug 7, 2025
3c6a9cb
Merge branch 'main' into dynamo-base-container-dev
nv-tusharma Aug 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@
.dockerignore
**/target/*
**/*safetensors
container/Dockerfile*
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,9 @@ jobs:
build-test:
runs-on:
group: Fastchecker
strategy:
matrix:
framework:
- vllm
name: Build and Test - ${{ matrix.framework }}
name: Build and Test - dynamo
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_dynamo
PYTEST_XML_FILE: pytest_test_report.xml
steps:
- name: Checkout repository
Expand All @@ -41,20 +37,29 @@ jobs:
- name: Define Image Tag
id: define_image_tag
run: |
echo "image_tag=dynamo:latest-${{ matrix.framework }}" >> $GITHUB_OUTPUT
echo "image_tag=dynamo:latest" >> $GITHUB_OUTPUT
- name: Build image
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
run: |
./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --target ci_minimum --framework ${{ matrix.framework }}
- name: Run Rust checks (llm/block-manager)
./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --target dev --framework none
- name: Start services with docker-compose
working-directory: ./deploy
run: |
docker run -w /workspace/lib/llm --name ${{ env.CONTAINER_ID }}_rust_checks ${{ steps.define_image_tag.outputs.image_tag }} bash -ec 'rustup component add rustfmt clippy && cargo fmt -- --check && cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && cargo test --locked --all-targets --features=block-manager'
docker compose up -d nats-server etcd-server
- name: Run Rust checks (block-manager + integration tests)
run: |
docker run -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm --name ${{ env.CONTAINER_ID }}_rust_checks ${{ steps.define_image_tag.outputs.image_tag }} bash -ec 'rustup component add rustfmt clippy && cargo fmt -- --check && cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && cargo test --locked --all-targets --features=block-manager && cargo test --locked --features integration -- --nocapture'
- name: Cleanup services
if: always()
working-directory: ./deploy
run: |
docker compose down
- name: Run pytest
env:
PYTEST_MARKS: "pre_merge or mypy"
run: |
docker run -w /workspace --name ${{ env.CONTAINER_ID }}_pytest ${{ steps.define_image_tag.outputs.image_tag }} bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
docker run -v ${{ github.workspace }}:/workspace -w /workspace --name ${{ env.CONTAINER_ID }}_pytest ${{ steps.define_image_tag.outputs.image_tag }} bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
- name: Copy test report from test Container
if: always()
run: |
Expand All @@ -63,7 +68,7 @@ jobs:
uses: actions/upload-artifact@v4
if: always()
with:
name: ${{ matrix.framework }}-python-test-results
name: dynamo-python-test-results
if-no-files-found: error
path: |
${{ env.PYTEST_XML_FILE }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/trigger_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ jobs:
script: |
const eventName = context.eventName;
let commit_sha = context.sha;
const workflow_id = 'pre-merge-python.yml';
const workflow_id = 'build-and-test.yml';
if (eventName === 'pull_request') {
commit_sha = context.payload.pull_request.head.sha;
}
Expand Down
273 changes: 273 additions & 0 deletions container/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD=false

# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
# --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
# --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
ARG ARCH=amd64
ARG ARCH_ALT=x86_64


##################################
########## Base Image ############
##################################

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base

# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
ARG CARGO_BUILD_JOBS

ARG NIXL_UCX_REF=v1.19.x
ARG NIXL_REF=0.4.1

# Environment variables for NIXL
ENV NIXL_SRC_DIR=/opt/nixl \
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins

USER root
ARG PYTHON_VERSION=3.12

COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

# Rust environment setup
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.87.0

WORKDIR /opt/dynamo

# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu

# Install Rust using RUSTARCH derived from ARCH_ALT
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
# TODO OPS-591: Add SHA check back based on RUSTARCH
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME

RUN apt-get update -y \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# NIXL build dependencies
autoconf \
automake \
cmake \
git \
libtool \
meson \
net-tools \
ninja-build \
pybind11-dev \
# These headers are missing with the hpcx installer, required
# by UCX to find RDMA devices
ibverbs-providers \
ibverbs-utils \
libibumad-dev \
libibverbs-dev \
librdmacm-dev \
libnuma-dev \
rdma-core \
# Rust build dependencies
clang \
libclang-dev \
protobuf-compiler \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Download external dependencies in parallel for better performance
ENV NATS_VERSION="v2.10.28"
RUN --mount=type=cache,target=/var/cache/apt \
wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \
dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb

ENV ETCD_VERSION="v3.5.21"
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH

### UCX EFA Setup ###
RUN rm -rf /opt/hpcx/ucx && \
rm -rf /usr/local/ucx && \
echo "Building UCX with reference $NIXL_UCX_REF" && \
cd /usr/local/src && \
git clone --depth 1 --branch $NIXL_UCX_REF https://github.com/openucx/ucx.git && \
cd ucx && \
./autogen.sh && \
./configure \
--prefix=/usr/local/ucx \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-cuda=/usr/local/cuda \
--with-verbs \
--with-efa \
--with-dm \
--with-gdrcopy=/usr/local \
--enable-mt && \
make -j$(nproc) && \
make -j$(nproc) install-strip && \
echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \
echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
ldconfig && \
cd /usr/local/src && \
rm -rf ucx

# UCX environment variables
ENV CPATH=/usr/include:$CPATH \
PATH=/usr/bin:$PATH \
PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH

### NIXL SETUP ###
# Clone nixl source with shallow clone for faster download
RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
cd ${NIXL_SRC_DIR} && \
if [ "$ARCH" = "arm64" ]; then \
nixl_build_args="-Ddisable_gds_backend=true"; \
else \
nixl_build_args=""; \
fi && \
meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \
ninja -C build/ -j$(nproc) && \
ninja -C build/ install && \
echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
ldconfig

# Install NIXL Python module
# TODO OPS-590: Move gds_path selection based on arch into NIXL build and re-enable gds backend for arm64
RUN if [ "$ARCH" = "arm64" ]; then \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl \
--config-settings=setup-args="-Ddisable_gds_backend=true"; \
else \
cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl; \
fi

# Create virtual environment
RUN mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python 3.12

# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"

# Install common and test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt

##################################
##### Wheel Build Image ##########
##################################

# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT

FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder

ARG CARGO_BUILD_JOBS
# Set CARGO_BUILD_JOBS to 16 if not provided
# This is to prevent cargo from building $(nproc) jobs in parallel,
# which might exceed the number of opened files limit.
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16}
# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12.
ARG RELEASE_BUILD

WORKDIR /opt/dynamo

RUN dnf update -y \
&& dnf install -y llvm-toolset protobuf-compiler python3.12-devel \
&& dnf clean all \
&& rm -rf /var/cache/dnf

ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/opt/dynamo/target \
VIRTUAL_ENV=/opt/dynamo/venv \
NIXL_PREFIX=/opt/nvidia/nvda_nixl

COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
COPY --from=base $NIXL_PREFIX $NIXL_PREFIX
COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH

# Copy configuration files first for better layer caching
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml /opt/dynamo/

# Copy source code
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/

# Build dynamo wheel
RUN uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
uv pip install maturin[patchelf] && \
maturin build --release --features block-manager --out /opt/dynamo/dist && \
if [ "$RELEASE_BUILD" = "true" ]; then \
# do not enable KVBM feature, ensure compatibility with lower glibc
uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \
uv run --python 3.10 maturin build --release --out /opt/dynamo/dist; \
fi

##############################################
########## Dev entrypoint image ##############
##############################################
FROM base AS dev

# Application environment variables
ENV DYNAMO_HOME=/opt/dynamo \
CARGO_TARGET_DIR=/opt/dynamo/target \
PYTHONPATH=/opt/dynamo:$PYTHONPATH

WORKDIR /opt/dynamo

COPY --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR

# Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

# Temporarily copy benchmarks folder for installation
COPY benchmarks/ /opt/dynamo/benchmarks/

# Install all python packages
RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl \
/opt/dynamo/benchmarks && \
rm -rf /opt/dynamo/benchmarks

# Copy launch banner
RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
sed '/^#\s/d' /opt/dynamo/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
Loading
Loading