From 746b690e026e652f800f4dd41c3dc3d2495d7ba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20L=C3=B3pez=20Luna?= Date: Thu, 30 Apr 2026 13:36:34 +0200 Subject: [PATCH] feat: add MUSA and OpenVINO support using upstream llama.cpp images Add musa and openvino as new LLAMA_SERVER_VARIANT options, leveraging the official ghcr.io/ggml-org/llama.cpp server-musa and server-openvino images. Changes: - scripts/resolve-llama-upstream-image.sh: map musa -> server-musa, openvino -> server-openvino - Makefile: add docker-build-musa, docker-run-musa, docker-build-openvino, docker-run-openvino targets; restrict both to linux/amd64; update help - .github/workflows/release.yml: add MUSA and OpenVINO image build steps with tags docker/model-runner:{tag}-musa and docker/model-runner:{tag}-openvino --- .github/workflows/release.yml | 46 +++++++++++++++++++++++++ Makefile | 33 ++++++++++++++++-- scripts/resolve-llama-upstream-image.sh | 8 +++++ 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 43471a18..ae704e74 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -261,6 +261,14 @@ jobs: echo "docker/model-runner:$RELEASE_TAG-sglang-cuda" >> "$GITHUB_OUTPUT" echo "docker/model-runner:latest-sglang-cuda" >> "$GITHUB_OUTPUT" echo 'EOF' >> "$GITHUB_OUTPUT" + echo "musa<> "$GITHUB_OUTPUT" + echo "docker/model-runner:$RELEASE_TAG-musa" >> "$GITHUB_OUTPUT" + echo "docker/model-runner:latest-musa" >> "$GITHUB_OUTPUT" + echo 'EOF' >> "$GITHUB_OUTPUT" + echo "openvino<> "$GITHUB_OUTPUT" + echo "docker/model-runner:$RELEASE_TAG-openvino" >> "$GITHUB_OUTPUT" + echo "docker/model-runner:latest-openvino" >> "$GITHUB_OUTPUT" + echo 'EOF' >> "$GITHUB_OUTPUT" echo "rocm<> "$GITHUB_OUTPUT" echo "docker/model-runner:$RELEASE_TAG-rocm" >> "$GITHUB_OUTPUT" echo "docker/model-runner:latest-rocm" >> "$GITHUB_OUTPUT" @@ -288,10 +296,16 @@ jobs: "$LLAMA_SERVER_VERSION" cuda) ROCM_IMAGE=$(bash scripts/resolve-llama-upstream-image.sh \ "$LLAMA_SERVER_VERSION" rocm) + MUSA_IMAGE=$(bash scripts/resolve-llama-upstream-image.sh \ + "$LLAMA_SERVER_VERSION" musa) + OPENVINO_IMAGE=$(bash scripts/resolve-llama-upstream-image.sh \ + "$LLAMA_SERVER_VERSION" openvino) echo "cpu=$CPU_IMAGE" >> "$GITHUB_OUTPUT" echo "cuda=$CUDA_IMAGE" >> "$GITHUB_OUTPUT" echo "rocm=$ROCM_IMAGE" >> "$GITHUB_OUTPUT" + echo "musa=$MUSA_IMAGE" >> "$GITHUB_OUTPUT" + echo "openvino=$OPENVINO_IMAGE" >> "$GITHUB_OUTPUT" - name: Log in to DockerHub uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 @@ -388,6 +402,38 @@ jobs: sbom: true provenance: mode=max tags: ${{ steps.tags.outputs.rocm }} + - name: Build MUSA image + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f + with: + file: Dockerfile + target: final-llamacpp + platforms: linux/amd64 + build-args: | + "LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}" + "LLAMA_SERVER_VARIANT=musa" + "LLAMA_UPSTREAM_IMAGE=${{ steps.llama-images.outputs.musa }}" + "VERSION=${{ env.RELEASE_TAG }}" + push: true + sbom: true + provenance: mode=max + tags: ${{ steps.tags.outputs.musa }} + + - name: Build OpenVINO image + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f + with: + file: Dockerfile + target: final-llamacpp + platforms: linux/amd64 + build-args: | + "LLAMA_SERVER_VERSION=${{ env.LLAMA_SERVER_VERSION }}" + "LLAMA_SERVER_VARIANT=openvino" + "LLAMA_UPSTREAM_IMAGE=${{ steps.llama-images.outputs.openvino }}" + "VERSION=${{ env.RELEASE_TAG }}" + push: true + sbom: true + provenance: mode=max + tags: ${{ steps.tags.outputs.openvino }} + # --------------------------------------------------------------------------- # Release CLI for Docker Desktop — build, sign & push CLI + Desktop module image diff --git a/Makefile b/Makefile index 86f8aa29..8e4bafdc 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,8 @@ LLAMA_UPSTREAM_IMAGE ?= $(shell \ DOCKER_IMAGE := docker/model-runner:latest DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang +DOCKER_IMAGE_MUSA := docker/model-runner:latest-musa +DOCKER_IMAGE_OPENVINO := docker/model-runner:latest-openvino DOCKER_TARGET ?= final-llamacpp PORT := 8080 LLAMA_ARGS ?= @@ -19,7 +21,7 @@ define check-llama-image $(if $(LLAMA_UPSTREAM_IMAGE),,$(error Failed to resolve llama.cpp upstream image. Check LLAMA_SERVER_VERSION and LLAMA_SERVER_VARIANT or set LLAMA_UPSTREAM_IMAGE directly.)) endef -ifeq ($(LLAMA_SERVER_VARIANT),rocm) +ifneq (,$(filter $(LLAMA_SERVER_VARIANT),rocm musa openvino)) DOCKER_BUILD_PLATFORMS := linux/amd64 else DOCKER_BUILD_PLATFORMS := linux/amd64,linux/arm64 @@ -42,6 +44,7 @@ DOCKER_BUILD_COMMON_ARGS = \ .PHONY: validate validate-versions validate-all lint help .PHONY: docker-build docker-build-multiplatform docker-run docker-run-impl .PHONY: docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang +.PHONY: docker-build-musa docker-run-musa docker-build-openvino docker-run-openvino .PHONY: test-docker-ce-installation .PHONY: vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean .PHONY: diffusers-build diffusers-install diffusers-dev diffusers-clean @@ -202,6 +205,28 @@ docker-build-sglang: docker-run-sglang: docker-build-sglang @$(MAKE) -s docker-run-impl DOCKER_IMAGE=$(DOCKER_IMAGE_SGLANG) +# Build MUSA Docker image +docker-build-musa: + @$(MAKE) docker-build \ + DOCKER_TARGET=final-llamacpp \ + DOCKER_IMAGE=$(DOCKER_IMAGE_MUSA) \ + LLAMA_SERVER_VARIANT=musa + +# Run MUSA Docker container with TCP port access and mounted model storage +docker-run-musa: docker-build-musa + @$(MAKE) -s docker-run-impl DOCKER_IMAGE=$(DOCKER_IMAGE_MUSA) + +# Build OpenVINO Docker image +docker-build-openvino: + @$(MAKE) docker-build \ + DOCKER_TARGET=final-llamacpp \ + DOCKER_IMAGE=$(DOCKER_IMAGE_OPENVINO) \ + LLAMA_SERVER_VARIANT=openvino + +# Run OpenVINO Docker container with TCP port access and mounted model storage +docker-run-openvino: docker-build-openvino + @$(MAKE) -s docker-run-impl DOCKER_IMAGE=$(DOCKER_IMAGE_OPENVINO) + # Common implementation for running Docker container docker-run-impl: @echo "" @@ -381,6 +406,10 @@ help: @echo " docker-run-vllm - Run vLLM Docker container" @echo " docker-build-sglang - Build SGLang Docker image" @echo " docker-run-sglang - Run SGLang Docker container" + @echo " docker-build-musa - Build MUSA Docker image" + @echo " docker-run-musa - Run MUSA Docker container" + @echo " docker-build-openvino - Build OpenVINO Docker image" + @echo " docker-run-openvino - Run OpenVINO Docker container" @echo " vllm-metal-build - Build vllm-metal tarball locally (macOS ARM64)" @echo " vllm-metal-install - Install vllm-metal from local tarball" @echo " vllm-metal-dev - Install vllm-metal from local source (editable)" @@ -394,7 +423,7 @@ help: @echo "Backend configuration options:" @echo " LLAMA_ARGS - Arguments for llama.cpp (e.g., \"--verbose --jinja -ngl 999 --ctx-size 2048\")" @echo " LLAMA_SERVER_VERSION - Upstream llama.cpp version (latest or bNNNN)" - @echo " LLAMA_SERVER_VARIANT - Linux backend flavor (cpu, cuda, or rocm)" + @echo " LLAMA_SERVER_VARIANT - Linux backend flavor (cpu, cuda, musa, openvino, or rocm)" @echo " LLAMA_UPSTREAM_IMAGE - Override the resolved upstream image directly" @echo " LOCAL_LLAMA - Use local llama.cpp build from llamacpp/install/bin (set to 1 to enable)" @echo "" diff --git a/scripts/resolve-llama-upstream-image.sh b/scripts/resolve-llama-upstream-image.sh index a9cc9e46..d5cf3227 100755 --- a/scripts/resolve-llama-upstream-image.sh +++ b/scripts/resolve-llama-upstream-image.sh @@ -34,6 +34,8 @@ Supported versions: Supported variants: - cpu - cuda + - musa + - openvino - rocm EOF exit 1 @@ -50,6 +52,12 @@ resolve_tag_base() { cuda) printf '%s\n' 'server-cuda13' ;; + musa) + printf '%s\n' 'server-musa' + ;; + openvino) + printf '%s\n' 'server-openvino' + ;; rocm) printf '%s\n' 'server-rocm' ;;