Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 0 additions & 57 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -147,59 +147,6 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \

RUN /opt/sglang-env/bin/python -c "import sglang; print(sglang.__version__)" > /opt/sglang-env/version

# --- Diffusers variant ---
FROM llamacpp AS diffusers

# Python package versions for reproducible builds
ARG DIFFUSERS_VERSION=0.36.0
ARG TORCH_VERSION=2.9.1
ARG TRANSFORMERS_VERSION=4.57.5
ARG ACCELERATE_VERSION=1.3.0
ARG SAFETENSORS_VERSION=0.5.2
ARG HUGGINGFACE_HUB_VERSION=0.34.0
ARG BITSANDBYTES_VERSION=0.49.1
ARG FASTAPI_VERSION=0.115.12
ARG UVICORN_VERSION=0.34.1
ARG PILLOW_VERSION=11.2.1

USER root

RUN apt update && apt install -y \
python3 python3-venv python3-dev \
curl ca-certificates build-essential \
&& rm -rf /var/lib/apt/lists/*

RUN mkdir -p /opt/diffusers-env && chown -R modelrunner:modelrunner /opt/diffusers-env

USER modelrunner

# Install uv and diffusers as modelrunner user
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
&& ~/.local/bin/uv venv --python /usr/bin/python3 /opt/diffusers-env \
&& ~/.local/bin/uv pip install --python /opt/diffusers-env/bin/python \
"diffusers==${DIFFUSERS_VERSION}" \
"torch==${TORCH_VERSION}" \
"transformers==${TRANSFORMERS_VERSION}" \
"accelerate==${ACCELERATE_VERSION}" \
"safetensors==${SAFETENSORS_VERSION}" \
"huggingface_hub==${HUGGINGFACE_HUB_VERSION}" \
"bitsandbytes==${BITSANDBYTES_VERSION}" \
"fastapi==${FASTAPI_VERSION}" \
"uvicorn[standard]==${UVICORN_VERSION}" \
"pillow==${PILLOW_VERSION}"

# Copy Python server code
USER root
COPY python/diffusers_server /tmp/diffusers_server/
RUN PYTHON_SITE_PACKAGES=$(/opt/diffusers-env/bin/python -c "import site; print(site.getsitepackages()[0])") && \
mkdir -p "$PYTHON_SITE_PACKAGES/diffusers_server" && \
cp -r /tmp/diffusers_server/* "$PYTHON_SITE_PACKAGES/diffusers_server/" && \
chown -R modelrunner:modelrunner "$PYTHON_SITE_PACKAGES/diffusers_server/" && \
rm -rf /tmp/diffusers_server
USER modelrunner

RUN /opt/diffusers-env/bin/python -c "import diffusers; print(diffusers.__version__)" > /opt/diffusers-env/version

FROM llamacpp AS final-llamacpp
# Copy the built binary from builder
COPY --from=builder /app/model-runner /app/model-runner
Expand All @@ -211,7 +158,3 @@ COPY --from=builder /app/model-runner /app/model-runner
FROM sglang AS final-sglang
# Copy the built binary from builder-sglang (without vLLM)
COPY --from=builder-sglang /app/model-runner /app/model-runner

FROM diffusers AS final-diffusers
# Copy the built binary from builder (with diffusers support)
COPY --from=builder /app/model-runner /app/model-runner
14 changes: 0 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ VLLM_VERSION ?= 0.17.0
DOCKER_IMAGE := docker/model-runner:latest
DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda
DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang
DOCKER_IMAGE_DIFFUSERS := docker/model-runner:latest-diffusers
DOCKER_TARGET ?= final-llamacpp
PORT := 8080
LLAMA_ARGS ?=
Expand All @@ -31,7 +30,6 @@ BUILD_DMR ?= 1
.PHONY: validate validate-all lint help
.PHONY: docker-build docker-build-multiplatform docker-run docker-run-impl
.PHONY: docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang
.PHONY: docker-build-diffusers docker-run-diffusers
.PHONY: test-docker-ce-installation
.PHONY: vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
.PHONY: diffusers-build diffusers-install diffusers-dev diffusers-clean
Expand Down Expand Up @@ -151,16 +149,6 @@ docker-build-sglang:
docker-run-sglang: docker-build-sglang
@$(MAKE) -s docker-run-impl DOCKER_IMAGE=$(DOCKER_IMAGE_SGLANG)

# Build Diffusers Docker image
docker-build-diffusers:
@$(MAKE) docker-build \
DOCKER_TARGET=final-diffusers \
DOCKER_IMAGE=$(DOCKER_IMAGE_DIFFUSERS)

# Run Diffusers Docker container with TCP port access and mounted model storage
docker-run-diffusers: docker-build-diffusers
@$(MAKE) -s docker-run-impl DOCKER_IMAGE=$(DOCKER_IMAGE_DIFFUSERS)

# Common implementation for running Docker container
docker-run-impl:
@echo ""
Expand Down Expand Up @@ -338,8 +326,6 @@ help:
@echo " docker-run-vllm - Run vLLM Docker container"
@echo " docker-build-sglang - Build SGLang Docker image"
@echo " docker-run-sglang - Run SGLang Docker container"
@echo " docker-build-diffusers - Build Diffusers Docker image"
@echo " docker-run-diffusers - Run Diffusers Docker container"
@echo " vllm-metal-build - Build vllm-metal tarball locally (macOS ARM64)"
@echo " vllm-metal-install - Install vllm-metal from local tarball"
@echo " vllm-metal-dev - Install vllm-metal from local source (editable)"
Expand Down
5 changes: 0 additions & 5 deletions cmd/cli/pkg/standalone/controller_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"os"

gpupkg "github.com/docker/model-runner/cmd/cli/pkg/gpu"
"github.com/docker/model-runner/pkg/inference/backends/diffusers"
"github.com/docker/model-runner/pkg/inference/backends/vllm"
)

Expand Down Expand Up @@ -33,10 +32,6 @@ func controllerImageVariant(detectedGPU gpupkg.GPUSupport, backend string) strin
if backend == vllm.Name {
return "vllm-cuda"
}
// If diffusers backend is requested, return diffusers variant
if backend == diffusers.Name {
return "diffusers"
}
// Default to llama.cpp backend behavior
switch detectedGPU {
case gpupkg.GPUSupportCUDA:
Expand Down
4 changes: 0 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (

"github.com/docker/model-runner/pkg/envconfig"
"github.com/docker/model-runner/pkg/inference"
"github.com/docker/model-runner/pkg/inference/backends/diffusers"
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
"github.com/docker/model-runner/pkg/inference/backends/sglang"
"github.com/docker/model-runner/pkg/inference/config"
Expand Down Expand Up @@ -132,9 +131,6 @@ func main() {
routing.BackendDef{Name: sglang.Name, Init: func(mm *models.Manager) (inference.Backend, error) {
return sglang.New(log, mm, log.With("component", sglang.Name), nil, sglangServerPath)
}},
routing.BackendDef{Name: diffusers.Name, Init: func(mm *models.Manager) (inference.Backend, error) {
return diffusers.New(log, mm, log.With("component", diffusers.Name), nil, diffusersServerPath)
}},
),
OnBackendError: func(name string, err error) {
log.Error("unable to initialize backend", "backend", name, "error", err)
Expand Down
1 change: 0 additions & 1 deletion scripts/docker-run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
add_accelerators() {
# Add NVIDIA GPU support for CUDA variants and GPU-accelerated backends
if [[ "${DOCKER_IMAGE-}" == *"-cuda" ]] || \
[[ "${DOCKER_IMAGE-}" == *"-diffusers" ]] || \
[[ "${DOCKER_IMAGE-}" == *"-sglang" ]]; then
if docker info -f '{{range $k, $v := .Runtimes}}{{$k}}{{"\n"}}{{end}}' 2>/dev/null | grep -qx "nvidia"; then
args+=("--gpus" "all" "--runtime=nvidia")
Expand Down
Loading