From ef4170e7b7838f237be6b643616f79e595820f06 Mon Sep 17 00:00:00 2001 From: Alex M Date: Tue, 7 Apr 2026 07:35:48 +0000 Subject: [PATCH] refactor: unify Dockerfiles into multi-stage build and update README Merge Dockerfile.dev and Dockerfile.prod into a single multi-stage Dockerfile with dev/prod targets. Extract entrypoint scripts to scripts/. Update README to reflect metrics enabled by default and add observability to the features list. --- .devcontainer/devcontainer.json | 3 +- .github/workflows/release.yml | 3 +- Dockerfile.prod => Dockerfile | 59 +++++++++----------- Dockerfile.dev | 99 --------------------------------- README.md | 6 +- docs/development.md | 4 +- scripts/start.sh | 16 ++++++ scripts/start_ray.sh | 38 +++++++++++++ 8 files changed, 90 insertions(+), 138 deletions(-) rename Dockerfile.prod => Dockerfile (69%) delete mode 100644 Dockerfile.dev create mode 100755 scripts/start.sh create mode 100755 scripts/start_ray.sh diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 121e13a..5764a43 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,7 +1,8 @@ { "name": "yasha", "build": { - "dockerfile": "../Dockerfile.dev", + "dockerfile": "../Dockerfile", + "target": "dev", "context": ".." }, "features": { diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3339712..77e82a7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,7 +40,8 @@ jobs: uses: docker/build-push-action@v6 with: context: . - file: Dockerfile.prod + file: Dockerfile + target: prod push: true build-args: | CUDA_VERSION=13.0.2 diff --git a/Dockerfile.prod b/Dockerfile similarity index 69% rename from Dockerfile.prod rename to Dockerfile index 9a9e2fc..74e0296 100644 --- a/Dockerfile.prod +++ b/Dockerfile @@ -41,9 +41,6 @@ WORKDIR /yasha ADD ./pyproject.toml pyproject.toml ADD ./README.md README.md ADD ./uv.lock uv.lock -ADD ./start.py start.py -ADD ./yasha yasha -ADD ./config config ADD ./plugins plugins ENV UV_PROJECT_ENVIRONMENT=/.venv @@ -63,36 +60,32 @@ RUN uv python install ${PYTHON_VERSION} ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH" +# --------------------------------------------------------------------------- +# Development target +# --------------------------------------------------------------------------- +FROM base AS dev + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --extra dev + +ADD ./scripts/start_ray.sh /yasha/scripts/start_ray.sh +RUN chmod +x /yasha/scripts/start_ray.sh + +CMD ["/bin/bash"] + +# --------------------------------------------------------------------------- +# Production target +# --------------------------------------------------------------------------- +FROM base AS prod + +ADD ./start.py start.py +ADD ./yasha yasha +ADD ./config config +ADD ./scripts scripts + RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --locked --no-install-project -WORKDIR / -COPY < /etc/apt/sources.list.d/cuda.list - -RUN apt-get update -y && \ - apt-get install -y --no-install-recommends \ - build-essential \ - curl \ - espeak-ng \ - git - -RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && CUDA_MAJOR_VERSION=$(echo $CUDA_VERSION | cut -d. -f1) && \ - apt update -y && \ - apt install -y --no-install-recommends \ - build-essential \ - cuda-nvcc-${CUDA_VERSION_DASH} \ - cuda-cudart-${CUDA_VERSION_DASH} \ - cuda-nvrtc-${CUDA_VERSION_DASH} \ - cuda-cuobjdump-${CUDA_VERSION_DASH} \ - libcurand-dev-${CUDA_VERSION_DASH} \ - libcublas-${CUDA_VERSION_DASH} \ - cudnn9-cuda-${CUDA_MAJOR_VERSION} - -# Install uv -COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -ENV UV_LINK_MODE=copy - -WORKDIR /yasha - -# Copy dependency manifests and workspace members so uv can resolve and -# pre-install packages during the image build. At runtime the dev container -# bind-mounts the full host repo over /yasha, shadowing everything below. -ADD ./pyproject.toml pyproject.toml -ADD ./README.md README.md -ADD ./uv.lock uv.lock -ADD ./plugins plugins - -ENV UV_PROJECT_ENVIRONMENT=/.venv -ENV VIRTUAL_ENV=/.venv -ENV YASHA_CACHE_DIR=/yasha/.cache/models -ENV RAY_REDIS_PORT=6379 -ENV RAY_CLUSTER_ADDRESS=0.0.0.0 -ENV RAY_HEAD_CPU_NUM=2 -ENV RAY_HEAD_GPU_NUM=1 -ENV YASHA_USE_EXISTING_RAY_CLUSTER=false -ENV YASHA_METRICS=true -ENV RAY_METRICS_EXPORT_PORT=8079 -RUN uv venv - -ARG PYTHON_VERSION -RUN uv python install ${PYTHON_VERSION} - -ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH" - -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --locked --no-install-project --extra dev - -WORKDIR / -COPY < **Why the extra steps?** The Dev Container overrides the image's default `CMD` (which normally runs `start.sh` to sync deps and start Ray). Inside a Dev Container you need to run these steps manually. The Dev Container automatically: -- Builds the dev image from `Dockerfile.dev` +- Builds the dev image from `Dockerfile` (target: `dev`) - Bind-mounts the repo to `/yasha` for live editing - Forwards ports `8000` (API) and `8265` (Ray Dashboard) - Installs extensions: Ruff, Python, Pyright, and Claude Code @@ -76,7 +76,7 @@ If you prefer not to use Dev Containers, you can build and run the dev image dir ### Building the dev image ```bash -docker build -t yasha_dev -f Dockerfile.dev . +docker build -t yasha_dev --target dev . ``` ### Running with live source mounting diff --git a/scripts/start.sh b/scripts/start.sh new file mode 100755 index 0000000..d9a24b8 --- /dev/null +++ b/scripts/start.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +EXTRAS="" +if [ -n "${YASHA_PLUGINS}" ]; then + for plugin in $(echo "${YASHA_PLUGINS}" | tr ',' ' '); do + EXTRAS="$EXTRAS --extra $plugin" + done +fi +uv sync --project /yasha --locked $EXTRAS + +if [ "${YASHA_USE_EXISTING_RAY_CLUSTER}" != "true" ]; then + /yasha/scripts/start_ray.sh --num-cpus "${RAY_HEAD_CPU_NUM}" --num-gpus "${RAY_HEAD_GPU_NUM}" +fi + +cd /yasha && uv run start.py diff --git a/scripts/start_ray.sh b/scripts/start_ray.sh new file mode 100755 index 0000000..fdba72b --- /dev/null +++ b/scripts/start_ray.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -e + +usage() { + echo "Usage: start_ray.sh --num-cpus --num-gpus [--enable-metrics ]" + exit 1 +} + +ENABLE_METRICS="true" + +while [[ $# -gt 0 ]]; do + case "$1" in + --num-cpus) NUM_CPUS="$2"; shift 2 ;; + --num-gpus) NUM_GPUS="$2"; shift 2 ;; + --enable-metrics) ENABLE_METRICS="$2"; shift 2 ;; + *) usage ;; + esac +done + +[ -z "${NUM_CPUS}" ] && usage +[ -z "${NUM_GPUS}" ] && usage + +METRICS_FLAG="" +if [ "${ENABLE_METRICS}" = "true" ]; then + METRICS_FLAG="--metrics-export-port=${RAY_METRICS_EXPORT_PORT:-8079}" +fi + +ray start --head \ + --dashboard-host=0.0.0.0 \ + --num-cpus="${NUM_CPUS}" \ + --num-gpus="${NUM_GPUS}" \ + --disable-usage-stats \ + ${METRICS_FLAG} + +if ! ray status; then + echo "ray cluster failed to start" + exit 1 +fi