docker · doringeman · Mar 23, 2026 · Mar 23, 2026
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
@@ -0,0 +1,28 @@
+name: E2E Tests
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches: [ main ]
+  push:
+    branches: [ main ]
+
+jobs:
+  e2e-test:
+    runs-on: macos-latest
+    timeout-minutes: 20
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
+        with:
+          submodules: recursive
+
+      - name: Set up Go
+        uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417
+        with:
+          go-version: 1.25.8
+          cache: true
+
+      - name: Run e2e tests
+        run: make e2e
@@ -9,6 +9,8 @@
 jobs:
  e2e-test:
+    permissions:
+      contents: read
    runs-on: macos-latest
    timeout-minutes: 20
@@ -9,6 +9,8 @@

 jobs:
  e2e-test:
+    permissions:
+      contents: read
    runs-on: macos-latest
    timeout-minutes: 20

diff --git a/Makefile b/Makefile
@@ -23,7 +23,7 @@ DOCKER_BUILD_ARGS := \
 	-t $(DOCKER_IMAGE)
 
 # Phony targets grouped by category
-.PHONY: build build-cli build-dmr install-cli run clean test integration-tests
+.PHONY: build build-cli build-dmr build-llamacpp install-cli run clean test integration-tests e2e
 .PHONY: validate validate-all lint help
 .PHONY: docker-build docker-build-multiplatform docker-run docker-run-impl
 .PHONY: docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang
@@ -44,6 +44,10 @@ build-cli:
 build-dmr:
 	go build -ldflags="-s -w" -o dmr ./cmd/dmr
 
+build-llamacpp:
+	git submodule update --init llamacpp/native
+	$(MAKE) -C llamacpp build
+
 install-cli:
 	$(MAKE) -C cmd/cli install
 
@@ -82,6 +86,18 @@ integration-tests:
 	go test -v -race -count=1 -tags=integration -run "^TestIntegration" -timeout=5m ./cmd/cli/commands
 	@echo "Integration tests completed!"
 
+e2e: build-llamacpp build
+	@echo "Running e2e tests..."
+	@echo "Checking test naming conventions..."
+	@INVALID_TESTS=$$(grep "^func Test" e2e/*_test.go | grep -v "^.*:func TestE2E" | grep -v "^.*:func TestMain"); \
+	if [ -n "$$INVALID_TESTS" ]; then \
+		echo "Error: Found test functions that don't start with 'TestE2E':"; \
+		echo "$$INVALID_TESTS" | sed 's/.*func \([^(]*\).*/\1/'; \
+		exit 1; \
+	fi
+	go test -v -count=1 -tags=e2e -run "^TestE2E" -timeout=15m ./e2e/
+	@echo "E2E tests completed!"
+
 test-docker-ce-installation:
 	@echo "Testing Docker CE installation..."
 	@echo "Note: This requires Docker to be running"
@@ -319,6 +335,8 @@ help:
 	@echo "  clean				- Clean build artifacts"
 	@echo "  test				- Run tests"
 	@echo "  integration-tests		- Run integration tests (requires Docker)"
+	@echo "  build-llamacpp		- Init submodule and build llama.cpp from source"
+	@echo "  e2e				- Run e2e tests (builds llamacpp + server, macOS)"
 	@echo "  test-docker-ce-installation	- Test Docker CE installation with CLI plugin"
 	@echo "  validate			- Run shellcheck validation"
 	@echo "  validate-all			- Run all CI validations locally (lint, test, shellcheck, go mod tidy)"

diff --git a/e2e/cli_test.go b/e2e/cli_test.go
@@ -0,0 +1,52 @@
+//go:build e2e
+
+package e2e
+
+import (
+	"strings"
+	"testing"
+)
+
+// TestE2E_CLI runs all CLI tests sequentially as subtests to ensure
+// correct ordering (pull → list → run → remove).
+func TestE2E_CLI(t *testing.T) {
+	t.Run("Pull", func(t *testing.T) {
+		out, err := runCLI(t, "pull", testModel)
+		if err != nil {
+			t.Fatalf("cli pull failed: %v\noutput: %s", err, out)
+		}
+		t.Logf("pull output: %s", out)
+	})
+
+	t.Run("List", func(t *testing.T) {
+		out, err := runCLI(t, "ls")
+		if err != nil {
+			t.Fatalf("cli ls failed: %v\noutput: %s", err, out)
+		}
+
+		if !strings.Contains(out, "smollm2") {
+			t.Errorf("expected smollm2 in list output, got:\n%s", out)
+		}
+		t.Logf("ls output:\n%s", out)
+	})
+
+	t.Run("Run", func(t *testing.T) {
+		out, err := runCLI(t, "run", testModel, "Say hi in one word.")
+		if err != nil {
+			t.Fatalf("cli run failed: %v\noutput: %s", err, out)
+		}
+
+		if strings.TrimSpace(out) == "" {
+			t.Fatal("cli run produced empty output")
+		}
+		t.Logf("run output: %s", out)
+	})
+
+	t.Run("Remove", func(t *testing.T) {
+		out, err := runCLI(t, "rm", "-f", testModel)
+		if err != nil {
+			t.Fatalf("cli rm failed: %v\noutput: %s", err, out)
+		}
+		t.Logf("rm output: %s", out)
+	})
+}
diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go
@@ -0,0 +1,158 @@
+//go:build e2e
+
+// Package e2e contains end-to-end tests that build and run the full
+// model-runner stack (server + llama.cpp backend + CLI) from source.
+//
+// These tests require:
+//   - The llamacpp submodule to be initialised and built (make build-llamacpp)
+//   - A successful `make build` so that model-runner, model-cli, and dmr exist
+//
+// Run with:
+//
+//	go test -v -count=1 -tags=e2e -timeout=15m ./e2e/
+package e2e
+
+import (
+	"context"
+	"fmt"
+	"net"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"testing"
+	"time"
+)
+
+const (
+	// testModel is small enough to pull quickly in CI.
+	testModel = "ai/smollm2:135M-Q4_0"
+
+	serverStartTimeout = 60 * time.Second
+)
+
+var (
+	// serverURL is the base URL of the running model-runner instance.
+	serverURL string
+	// cliBin is the absolute path to the model-cli binary.
+	cliBin string
+)
+
+// TestMain builds the binaries, starts the server (same pattern as dmr),
+// and tears it down after all tests complete.
+func TestMain(m *testing.M) {
+	code := run(m)
+	os.Exit(code)
+}
+
+func run(m *testing.M) int {
+	// go test sets cwd to the package directory (e2e/), so the repo root is ../
+	root, err := filepath.Abs("..")
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: %v\n", err)
+		return 1
+	}
+
+	// ── 1. Build binaries ──────────────────────────────────────────────
+	fmt.Fprintln(os.Stderr, "e2e: building server and CLI...")
+	if err := makeTarget(root, "build"); err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: make build failed: %v\n", err)
+		return 1
+	}
+
+	serverBin := filepath.Join(root, "model-runner")
+	cliBin = filepath.Join(root, "cmd", "cli", "model-cli")
+	llamaBin := filepath.Join(root, "llamacpp", "install", "bin")
+
+	for _, path := range []string{serverBin, cliBin, llamaBin} {
+		if _, err := os.Stat(path); err != nil {
+			fmt.Fprintf(os.Stderr, "e2e: not found: %s\n", path)
+			return 1
+		}
+	}
+
+	// ── 2. Start model-runner (same pattern as cmd/dmr) ────────────────
+	port, err := freePort()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: %v\n", err)
+		return 1
+	}
+	serverURL = "http://localhost:" + strconv.Itoa(port)
+	fmt.Fprintf(os.Stderr, "e2e: starting model-runner on port %d\n", port)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	server := exec.CommandContext(ctx, serverBin)
+	server.Dir = root
+	server.Env = append(os.Environ(),
+		"MODEL_RUNNER_PORT="+strconv.Itoa(port),
+		"LLAMA_SERVER_PATH="+llamaBin,
+	)
+	server.Stdout = os.Stderr
+	server.Stderr = os.Stderr
+
+	if err := server.Start(); err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: failed to start server: %v\n", err)
+		return 1
+	}
+	defer func() {
+		cancel()
+		_ = server.Wait()
+	}()
+
+	// ── 3. Wait for health ─────────────────────────────────────────────
+	if err := waitForServer(serverURL+"/models", serverStartTimeout); err != nil {
+		fmt.Fprintf(os.Stderr, "e2e: %v\n", err)
+		return 1
+	}
+	fmt.Fprintf(os.Stderr, "e2e: server ready at %s\n", serverURL)
+
+	// ── 4. Run tests ───────────────────────────────────────────────────
+	return m.Run()
+}
+
+func makeTarget(dir, target string) error {
+	cmd := exec.Command("make", target)
+	cmd.Dir = dir
+	cmd.Stdout = os.Stderr
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func freePort() (int, error) {
+	l, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		return 0, fmt.Errorf("finding free port: %w", err)
+	}
+	defer l.Close()
+	return l.Addr().(*net.TCPAddr).Port, nil
+}
+
+func waitForServer(url string, timeout time.Duration) error {
+	client := &http.Client{Timeout: 2 * time.Second}
+	deadline := time.Now().Add(timeout)
+	for time.Now().Before(deadline) {
+		resp, err := client.Get(url)
+		if err == nil {
+			resp.Body.Close()
+			if resp.StatusCode == http.StatusOK {
+				return nil
+			}
+		}
+		time.Sleep(200 * time.Millisecond)
+	}
+	return fmt.Errorf("server not ready after %s", timeout)
+}
+
+// runCLI executes the model-cli binary with the given arguments and
+// MODEL_RUNNER_HOST pointing to the test server. The subprocess is
+// cancelled if the test's context expires.
+func runCLI(t *testing.T, args ...string) (string, error) {
+	t.Helper()
+	cmd := exec.CommandContext(t.Context(), cliBin, args...)
+	cmd.Env = append(os.Environ(), "MODEL_RUNNER_HOST="+serverURL)
+	out, err := cmd.CombinedOutput()
+	return string(out), err
+}