diff --git a/.github/workflows/nightly-terminal-bench.yml b/.github/workflows/nightly-terminal-bench.yml
index 1d3c145f0f..4c58964c73 100644
--- a/.github/workflows/nightly-terminal-bench.yml
+++ b/.github/workflows/nightly-terminal-bench.yml
@@ -10,7 +10,7 @@ on:
   workflow_dispatch:
     inputs:
       models:
-        description: 'Models to test (comma-separated, or "all" for opus-4-7 + gpt-5.5 + google/gemini-3-pro-preview + google/gemini-3-flash-preview)'
+        description: 'Models to test (comma-separated, or "all" for opus-4-7 + gpt-5.5 + google/gemini-3-pro-preview + google/gemini-3-flash-preview + google/gemini-3.5-flash)'
         required: false
         default: "all"
         type: string
@@ -18,6 +18,11 @@ on:
         description: "Experiments to enable (comma-separated)"
         required: false
         type: string
+      mux_run_as_goal:
+        description: "Run nightly smoke/matrix tasks as strict mux CLI Goal Runs"
+        required: false
+        default: false
+        type: boolean
 
 jobs:
   # Smoke test: run chess-best-move task first to catch broken agent setup
@@ -33,6 +38,7 @@ jobs:
       env: "daytona"
       task_names: "chess-best-move"
       experiments: ${{ inputs.experiments }}
+      mux_run_as_goal: ${{ github.event_name == 'workflow_dispatch' && inputs.mux_run_as_goal || false }}
     # Keep least-privilege secret scope for reusable workflow calls.
     secrets:
       TERMINAL_BENCH_ANTHROPIC_API_KEY: ${{ secrets.TERMINAL_BENCH_ANTHROPIC_API_KEY }}
@@ -58,6 +64,7 @@ jobs:
       mux_project_path: "/testbed"
       timeout: "3000"
       experiments: ${{ inputs.experiments }}
+      mux_run_as_goal: ${{ github.event_name == 'workflow_dispatch' && inputs.mux_run_as_goal || false }}
     secrets:
       TERMINAL_BENCH_ANTHROPIC_API_KEY: ${{ secrets.TERMINAL_BENCH_ANTHROPIC_API_KEY }}
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -79,6 +86,7 @@ jobs:
       mux_project_path: "/app/src"
       timeout: "600"
       experiments: ${{ inputs.experiments }}
+      mux_run_as_goal: ${{ github.event_name == 'workflow_dispatch' && inputs.mux_run_as_goal || false }}
     secrets:
       TERMINAL_BENCH_ANTHROPIC_API_KEY: ${{ secrets.TERMINAL_BENCH_ANTHROPIC_API_KEY }}
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -100,6 +108,7 @@ jobs:
       mux_project_path: "/app"
       timeout: "1800"
       experiments: ${{ inputs.experiments }}
+      mux_run_as_goal: ${{ github.event_name == 'workflow_dispatch' && inputs.mux_run_as_goal || false }}
     secrets:
       TERMINAL_BENCH_ANTHROPIC_API_KEY: ${{ secrets.TERMINAL_BENCH_ANTHROPIC_API_KEY }}
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -120,7 +129,7 @@ jobs:
           INPUT_MODELS: ${{ inputs.models }}
         run: |
           if [ "$INPUT_MODELS" = "all" ] || [ -z "$INPUT_MODELS" ]; then
-            echo 'models=["anthropic/claude-opus-4-7","openai/gpt-5.5","google/gemini-3-pro-preview","google/gemini-3-flash-preview"]' >> "$GITHUB_OUTPUT"
+            echo 'models=["anthropic/claude-opus-4-7","openai/gpt-5.5","google/gemini-3-pro-preview","google/gemini-3-flash-preview","google/gemini-3.5-flash"]' >> "$GITHUB_OUTPUT"
           else
             # Convert comma-separated to JSON array
             models_json=$(echo "$INPUT_MODELS" | jq -R -s -c 'split(",") | map(gsub("^\\s+|\\s+$"; ""))')
@@ -134,7 +143,7 @@ jobs:
       matrix:
         model: ${{ fromJSON(needs.determine-models.outputs.models) }}
       fail-fast: false
-      max-parallel: 1  # Run models sequentially to stay within Daytona's 25-sandbox limit
+      max-parallel: 1 # Run models sequentially to stay within Daytona's 25-sandbox limit
     uses: ./.github/workflows/terminal-bench.yml
     with:
       model_name: ${{ matrix.model }}
@@ -144,6 +153,7 @@ jobs:
       concurrency: "48"
       env: "daytona"
       experiments: ${{ inputs.experiments }}
+      mux_run_as_goal: ${{ github.event_name == 'workflow_dispatch' && inputs.mux_run_as_goal || false }}
     secrets:
       TERMINAL_BENCH_ANTHROPIC_API_KEY: ${{ secrets.TERMINAL_BENCH_ANTHROPIC_API_KEY }}
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
diff --git a/.github/workflows/terminal-bench.yml b/.github/workflows/terminal-bench.yml
index c79e0ea23e..7d316dec9e 100644
--- a/.github/workflows/terminal-bench.yml
+++ b/.github/workflows/terminal-bench.yml
@@ -27,7 +27,7 @@ on:
         required: false
         type: string
       extra_args:
-        description: "Additional arguments to pass to harbor"
+        description: "Additional arguments to pass to harbor (e.g., --n-tasks 5 for quick dispatch runs)"
         required: false
         type: string
       experiments:
@@ -50,10 +50,15 @@ on:
         type: string
         default: ""
       mux_run_args:
-        description: "Additional CLI flags passed to mux run (e.g., --thinking high --use-1m --budget 5.00)"
+        description: "Additional CLI flags passed to mux run (e.g., --thinking high --use-1m --budget 5.00; with goal mode, add --goal-turns/--goal-budget)"
         required: false
         type: string
         default: ""
+      mux_run_as_goal:
+        description: "Run each task instruction as a mux CLI Goal Run"
+        required: false
+        type: boolean
+        default: false
     secrets:
       # Keep the runtime env name stable while routing benchmark spend to its own key.
       TERMINAL_BENCH_ANTHROPIC_API_KEY:
@@ -92,11 +97,16 @@ on:
         required: false
         type: string
       mux_run_args:
-        description: "Additional CLI flags passed to mux run (e.g., --thinking high --use-1m)"
+        description: "Additional CLI flags passed to mux run (e.g., --thinking high --use-1m; with goal mode, add --goal-turns/--goal-budget)"
         required: false
         type: string
+      mux_run_as_goal:
+        description: "Run each task instruction as a mux CLI Goal Run"
+        required: false
+        default: false
+        type: boolean
       extra_args:
-        description: "Additional arguments to pass to harbor"
+        description: "Additional arguments to pass to harbor (e.g., --n-tasks 5 for quick dispatch runs)"
         required: false
         type: string
       experiments:
@@ -107,10 +117,6 @@ on:
         description: "Agent timeout in seconds (default: 1800 = 30 min)"
         required: false
         type: string
-      max_tasks:
-        description: "Maximum number of tasks to run (for faster iteration)"
-        required: false
-        type: string
 
 jobs:
   benchmark:
@@ -206,6 +212,7 @@ jobs:
             ${{ inputs.extra_args || '' }}
           MUX_EXPERIMENTS: ${{ inputs.experiments }}
           MUX_RUN_ARGS: ${{ inputs.mux_run_args }}
+          MUX_RUN_AS_GOAL: ${{ inputs.mux_run_as_goal && '1' || '' }}
           ANTHROPIC_API_KEY: ${{ secrets.TERMINAL_BENCH_ANTHROPIC_API_KEY }}
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
@@ -358,6 +365,7 @@ jobs:
           GCP_PROJECT_ID: mux-benchmarks
           BQ_DATASET: benchmarks
           MUX_EXPERIMENTS: ${{ inputs.experiments }}
+          MUX_RUN_AS_GOAL: ${{ inputs.mux_run_as_goal && '1' || '' }}
         run: |
           if [ -z "$GCP_SA_KEY" ]; then
             echo "GCP_SA_KEY not set, skipping BigQuery upload"
@@ -376,6 +384,7 @@ jobs:
           GCP_PROJECT_ID: mux-benchmarks
           BQ_DATASET: benchmarks
           MUX_EXPERIMENTS: ${{ inputs.experiments }}
+          MUX_RUN_AS_GOAL: ${{ inputs.mux_run_as_goal && '1' || '' }}
         run: |
           if [ -z "$GCP_SA_KEY" ]; then
             echo "GCP_SA_KEY not set, skipping BigQuery upload"
diff --git a/.mux/skills/tbench/SKILL.md b/.mux/skills/tbench/SKILL.md
index 9c7d14f7ef..6db67bb6ba 100644
--- a/.mux/skills/tbench/SKILL.md
+++ b/.mux/skills/tbench/SKILL.md
@@ -59,6 +59,7 @@ make benchmark-terminal TB_ENV=daytona TB_CONCURRENCY=48 TB_TASK_NAMES="chess-be
 - `TB_TASK_NAMES`: Space-separated task names to run (default: all tasks)
 - `TB_ARGS`: Additional arguments passed to harbor
 - `MUX_RUN_ARGS`: CLI flags passed directly to `mux run` inside the container (e.g., `--thinking high --use-1m --budget 5.00`). This is the primary mechanism for all `mux run` flags — avoids per-flag plumbing.
+- `MUX_RUN_AS_GOAL`: When set to `1`, runs each task instruction as a strict `mux run --goal` objective while still piping the instruction to stdin. Use `MUX_RUN_ARGS` for goal limits such as `--goal-turns` and `--goal-budget`.
 
 ### Timeout Handling
 
@@ -109,6 +110,22 @@ gh workflow run terminal-bench.yml \
   -f mux_run_args="--thinking high --budget 5.00"
 ```
 
+**Strict goal-mode runs:**
+
+```bash
+# Run a single task as a strict CLI Goal Run
+MUX_RUN_AS_GOAL=1 \
+MUX_RUN_ARGS="--thinking high --goal-turns 30 --goal-budget 10.00" \
+make benchmark-terminal TB_TASK_NAMES="chess-best-move"
+
+# CI dispatch
+gh workflow run terminal-bench.yml \
+  -f model_name=anthropic/claude-sonnet-4-5 \
+  -f task_names=chess-best-move \
+  -f mux_run_as_goal=true \
+  -f mux_run_args="--thinking high --goal-turns 30 --goal-budget 10.00"
+```
+
 **Local runs:**
 
 ```bash
diff --git a/benchmarks/terminal_bench/mux-run.sh b/benchmarks/terminal_bench/mux-run.sh
index 006628681c..9d87610348 100644
--- a/benchmarks/terminal_bench/mux-run.sh
+++ b/benchmarks/terminal_bench/mux-run.sh
@@ -34,6 +34,16 @@ MUX_MODEL="${MUX_MODEL:-anthropic:claude-sonnet-4-5}"
 MUX_TIMEOUT_MS="${MUX_TIMEOUT_MS:-}"
 MUX_WORKSPACE_ID="${MUX_WORKSPACE_ID:-mux-bench}"
 MUX_EXPERIMENTS="${MUX_EXPERIMENTS:-}"
+MUX_RUN_AS_GOAL="${MUX_RUN_AS_GOAL:-}"
+
+mux_run_as_goal_normalized="${MUX_RUN_AS_GOAL,,}"
+mux_run_as_goal_normalized="${mux_run_as_goal_normalized#"${mux_run_as_goal_normalized%%[![:space:]]*}"}"
+mux_run_as_goal_normalized="${mux_run_as_goal_normalized%"${mux_run_as_goal_normalized##*[![:space:]]}"}"
+case "${mux_run_as_goal_normalized}" in
+  "" | "0" | "false") mux_run_as_goal_enabled=0 ;;
+  "1" | "true") mux_run_as_goal_enabled=1 ;;
+  *) fatal "MUX_RUN_AS_GOAL must be one of: 1, true, 0, false" ;;
+esac
 
 resolve_project_path() {
   if [[ -n "${MUX_PROJECT_PATH}" ]]; then
@@ -80,11 +90,27 @@ if [[ -n "${MUX_EXPERIMENTS}" ]]; then
   done
 fi
 
+if [[ "${mux_run_as_goal_enabled}" == "1" ]]; then
+  log "strict mux goal mode enabled"
+  cmd+=(--goal "${instruction}")
+else
+  log "strict mux goal mode disabled"
+fi
+
+mux_run_args=()
 # Append arbitrary mux run flags (e.g., --thinking high --mode exec --use-1m --budget 5.00)
 if [[ -n "${MUX_RUN_ARGS:-}" ]]; then
-  # Word-split intentional: MUX_RUN_ARGS contains space-separated CLI flags
+  # Word-split intentional: MUX_RUN_ARGS contains space-separated CLI flags.
   # shellcheck disable=SC2206
-  cmd+=(${MUX_RUN_ARGS})
+  mux_run_args=(${MUX_RUN_ARGS})
+  if [[ "${mux_run_as_goal_enabled}" == "1" ]]; then
+    for arg in "${mux_run_args[@]}"; do
+      if [[ "${arg}" == "--goal" || "${arg}" == --goal=* ]]; then
+        fatal "MUX_RUN_ARGS must not include --goal when MUX_RUN_AS_GOAL is enabled"
+      fi
+    done
+  fi
+  cmd+=("${mux_run_args[@]}")
 fi
 
 # NOTE: Harbor only automatically collects /logs/agent on timeouts.
@@ -103,13 +129,19 @@ if [[ -n "${MUX_TIMEOUT_MS}" ]]; then
 fi
 
 # Capture output to file while streaming to terminal for token extraction.
-# Keep stderr separate so the stdout log stays valid JSONL.
-if ! printf '%s' "${instruction}" \
+# Keep stderr separate so the stdout log stays valid JSONL. Temporarily disable
+# errexit so token extraction still runs after mux returns a meaningful nonzero
+# code such as strict goal-mode exit 3.
+set +e
+printf '%s' "${instruction}" \
   | "${cmd[@]}" \
     2> >(tee "${MUX_STDERR_FILE}" >&2) \
-  | tee "${MUX_OUTPUT_FILE}"; then
-  fatal "mux agent session failed"
-fi
+  | tee "${MUX_OUTPUT_FILE}"
+pipeline_status=("${PIPESTATUS[@]}")
+set -e
+stdin_status="${pipeline_status[0]}"
+mux_status="${pipeline_status[1]}"
+tee_status="${pipeline_status[2]}"
 
 # Extract usage and cost from the JSONL output.
 # Prefer the run-complete event (emitted at end of --json run) which has aggregated
@@ -159,4 +191,19 @@ for usage in cumulative_by_msg.values():
 result["input"] += subagent_input
 result["output"] += subagent_output
 print(json.dumps(result))
-' "${MUX_OUTPUT_FILE}" > "${MUX_TOKEN_FILE}" 2>/dev/null || true
+' "${MUX_OUTPUT_FILE}" >"${MUX_TOKEN_FILE}" 2>/dev/null || true
+
+if [[ "${mux_status}" -ne 0 ]]; then
+  printf '[mux-run] ERROR: mux agent session failed (exit %s)\n' "${mux_status}" >&2
+  exit "${mux_status}"
+fi
+
+if [[ "${tee_status}" -ne 0 ]]; then
+  printf '[mux-run] ERROR: failed to capture mux stdout (exit %s)\n' "${tee_status}" >&2
+  exit "${tee_status}"
+fi
+
+if [[ "${stdin_status}" -ne 0 ]]; then
+  printf '[mux-run] ERROR: failed to send instruction to mux (exit %s)\n' "${stdin_status}" >&2
+  exit "${stdin_status}"
+fi
diff --git a/benchmarks/terminal_bench/mux_agent.py b/benchmarks/terminal_bench/mux_agent.py
index ba5ffe7278..530687f729 100644
--- a/benchmarks/terminal_bench/mux_agent.py
+++ b/benchmarks/terminal_bench/mux_agent.py
@@ -78,6 +78,7 @@ class MuxAgent(BaseInstalledAgent):
         # Generic pass-through for arbitrary mux run CLI flags (e.g., --thinking
         # high --use-1m --budget 5.00). Avoids per-flag plumbing.
         "MUX_RUN_ARGS",
+        "MUX_RUN_AS_GOAL",
     )
 
     def __init__(
@@ -167,12 +168,31 @@ def _env(self) -> dict[str, str]:
             if not project_path.strip():
                 raise ValueError("MUX_PROJECT_PATH must be non-empty when provided")
 
+        mux_run_as_goal = self._normalize_mux_run_as_goal(env.get("MUX_RUN_AS_GOAL"))
+        if mux_run_as_goal is None:
+            env.pop("MUX_RUN_AS_GOAL", None)
+        else:
+            env["MUX_RUN_AS_GOAL"] = mux_run_as_goal
+
         # Set experiments from kwarg (takes precedence over env var)
         if self._experiments:
             env["MUX_EXPERIMENTS"] = self._experiments
 
         return env
 
+    @staticmethod
+    def _normalize_mux_run_as_goal(value: str | None) -> str | None:
+        if value is None:
+            return None
+
+        normalized = value.strip().lower()
+        if normalized in ("", "0", "false"):
+            return None
+        if normalized in ("1", "true"):
+            return "1"
+
+        raise ValueError("MUX_RUN_AS_GOAL must be one of: 1, true, 0, false")
+
     @property
     def _install_agent_template_path(self) -> Path:
         return Path(__file__).with_name("mux_setup.sh.j2")
@@ -288,6 +308,7 @@ async def run(
     ) -> None:
         """Run agent commands, download token file, then populate context."""
         # Execute commands (from base class logic, but without calling populate_context)
+        failed_command: tuple[int, int] | None = None
         for i, exec_input in enumerate(self.create_run_agent_commands(instruction)):
             command_dir = self.logs_dir / f"command-{i}"
             command_dir.mkdir(parents=True, exist_ok=True)
@@ -305,6 +326,9 @@ async def run(
                 (command_dir / "stdout.txt").write_text(result.stdout)
             if result.stderr:
                 (command_dir / "stderr.txt").write_text(result.stderr)
+            if result.return_code != 0:
+                failed_command = (i, result.return_code)
+                break
 
         # Download token file from container BEFORE populating context
         # Clear any stale token file first to avoid reading outdated data if download fails
@@ -317,6 +341,12 @@ async def run(
 
         self.populate_context_post_run(context)
 
+        if failed_command is not None:
+            command_index, return_code = failed_command
+            raise RuntimeError(
+                f"mux agent command failed (command {command_index}, exit {return_code})"
+            )
+
     def populate_context_post_run(self, context: AgentContext) -> None:
         """Extract token usage and cost from the token file written by mux-run.sh."""
         token_file = self.logs_dir / "mux-tokens.json"
diff --git a/benchmarks/terminal_bench/mux_agent_test.py b/benchmarks/terminal_bench/mux_agent_test.py
index a3524b840d..469e167d07 100644
--- a/benchmarks/terminal_bench/mux_agent_test.py
+++ b/benchmarks/terminal_bench/mux_agent_test.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
 
+import asyncio
 import io
 import tarfile
+from dataclasses import dataclass
 from pathlib import Path
+from types import SimpleNamespace
 
 import pytest
 
@@ -21,27 +24,127 @@ def _repo_root() -> Path:
     return Path(__file__).resolve().parents[2]
 
 
-def test_env_defaults_are_normalized(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_env_defaults_are_normalized(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
     monkeypatch.setenv("MUX_AGENT_REPO_ROOT", str(_repo_root()))
-    agent = MuxAgent(model_name="anthropic/claude-sonnet-4-5")
+    agent = MuxAgent(logs_dir=tmp_path, model_name="anthropic/claude-sonnet-4-5")
 
     env = agent._env
 
     assert env["MUX_MODEL"] == "anthropic:claude-sonnet-4-5"
-    assert env["MUX_THINKING_LEVEL"] == "high"
-    assert env["MUX_MODE"] == "exec"
     assert env["MUX_PROJECT_CANDIDATES"] == agent._DEFAULT_PROJECT_CANDIDATES
 
 
-def test_timeout_must_be_numeric(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_goal_mode_env_is_forwarded(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    monkeypatch.setenv("MUX_AGENT_REPO_ROOT", str(_repo_root()))
+    monkeypatch.setenv("MUX_RUN_AS_GOAL", "true")
+
+    agent = MuxAgent(logs_dir=tmp_path)
+
+    assert agent._env["MUX_RUN_AS_GOAL"] == "1"
+
+
+def test_goal_mode_defaults_to_disabled(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    monkeypatch.setenv("MUX_AGENT_REPO_ROOT", str(_repo_root()))
+
+    agent = MuxAgent(logs_dir=tmp_path)
+
+    assert "MUX_RUN_AS_GOAL" not in agent._env
+
+
+def test_goal_mode_rejects_invalid_values(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    monkeypatch.setenv("MUX_AGENT_REPO_ROOT", str(_repo_root()))
+    monkeypatch.setenv("MUX_RUN_AS_GOAL", "yes")
+
+    agent = MuxAgent(logs_dir=tmp_path)
+    with pytest.raises(ValueError, match="MUX_RUN_AS_GOAL"):
+        _ = agent._env
+
+
+def test_timeout_must_be_numeric(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
     monkeypatch.setenv("MUX_AGENT_REPO_ROOT", str(_repo_root()))
     monkeypatch.setenv("MUX_TIMEOUT_MS", "not-a-number")
 
-    agent = MuxAgent()
+    agent = MuxAgent(logs_dir=tmp_path)
     with pytest.raises(ValueError):
         _ = agent._env
 
 
+@dataclass
+class _ExecResult:
+    return_code: int
+    stdout: str = ""
+    stderr: str = ""
+
+
+class _FakeEnvironment:
+    def __init__(self, result: _ExecResult) -> None:
+        self.result = result
+        self.download_attempts: list[tuple[str, Path]] = []
+
+    async def exec(self, **_kwargs: object) -> _ExecResult:
+        return self.result
+
+    async def download_file(self, source_path: str, target_path: Path) -> None:
+        self.download_attempts.append((source_path, target_path))
+        target_path.write_text('{"input": 7, "output": 11, "cost_usd": 0.42}')
+
+
+def test_run_raises_after_preserving_logs_for_nonzero_exit(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    monkeypatch.setenv("MUX_AGENT_REPO_ROOT", str(_repo_root()))
+    agent = MuxAgent(logs_dir=tmp_path)
+    environment = _FakeEnvironment(
+        _ExecResult(return_code=3, stdout="out", stderr="err")
+    )
+    context = SimpleNamespace()
+
+    with pytest.raises(RuntimeError, match="mux agent command failed"):
+        asyncio.run(agent.run("do the task", environment, context))
+
+    command_dir = tmp_path / "command-0"
+    assert (command_dir / "return-code.txt").read_text() == "3"
+    assert (command_dir / "stdout.txt").read_text() == "out"
+    assert (command_dir / "stderr.txt").read_text() == "err"
+    assert environment.download_attempts == [
+        (agent._TOKEN_FILE_PATH, tmp_path / "mux-tokens.json")
+    ]
+    assert getattr(context, "n_input_tokens") == 7
+    assert getattr(context, "n_output_tokens") == 11
+    assert getattr(context, "cost_usd") == 0.42
+
+
+def test_run_populates_context_for_successful_exit(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    monkeypatch.setenv("MUX_AGENT_REPO_ROOT", str(_repo_root()))
+    agent = MuxAgent(logs_dir=tmp_path)
+    environment = _FakeEnvironment(
+        _ExecResult(return_code=0, stdout="out", stderr="err")
+    )
+    context = SimpleNamespace()
+
+    asyncio.run(agent.run("do the task", environment, context))
+
+    command_dir = tmp_path / "command-0"
+    assert (command_dir / "return-code.txt").read_text() == "0"
+    assert (command_dir / "stdout.txt").read_text() == "out"
+    assert (command_dir / "stderr.txt").read_text() == "err"
+    assert getattr(context, "n_input_tokens") == 7
+    assert getattr(context, "n_output_tokens") == 11
+    assert getattr(context, "cost_usd") == 0.42
+
+
 def test_app_archive_includes_postinstall_script() -> None:
     assert "scripts/postinstall.sh" in MuxAgent._INCLUDE_PATHS
 
diff --git a/docs/adr/0004-cli-goal-runs-are-not-strict-goal-aliases.md b/docs/adr/0004-cli-goal-runs-are-not-strict-goal-aliases.md
new file mode 100644
index 0000000000..41e2216045
--- /dev/null
+++ b/docs/adr/0004-cli-goal-runs-are-not-strict-goal-aliases.md
@@ -0,0 +1,31 @@
+---
+title: CLI Goal Runs are not strict /goal aliases
+description: Architecture decision for giving mux run --goal CLI-specific completion and limit semantics
+---
+
+# 0004. CLI Goal Runs are not strict /goal aliases
+
+## Status
+
+Accepted
+
+## Context
+
+`mux run` is designed for automation: it normally sends one request, streams the result, and exits. Interactive `/goal` is a workspace lifecycle command with defaults, controls, and cooldown behavior that assume a user can intervene from the UI.
+
+Adding `mux run --goal` creates a different automation need. A script needs one process to keep driving an objective until there is an authoritative completion signal, while still preserving goal accounting and model-facing goal tools.
+
+## Decision
+
+Mux will model `mux run --goal` as a CLI Goal Run, not as a strict alias for interactive `/goal`.
+
+A CLI Goal Run creates an ephemeral goal for the `mux run` process, sends either the provided message/stdin or the goal text as the kickoff message, and continues in exec mode until the persisted goal status is `complete` or a stop condition is reached. Interactive goal defaults are not applied; omitted `--goal-budget` and `--goal-turns` mean no goal-specific limit. The existing session `--budget` remains a separate hard stop.
+
+CLI Goal Runs bypass the interactive goal continuation cooldown because the process itself is the automation boundary. They still use the shared goal service for prompts, accounting, tool availability, budget-limited wrap-up, and persisted completion state.
+
+## Consequences
+
+- `mux run` remains single-request by default, with `--goal` documented as the explicit multi-continuation exception.
+- Scripts can trust exit code `0` only when the persisted goal is complete; free-text claims are not enough unless existing goal completion fallback persisted them.
+- Goal and session budgets can stop the same process for different reasons, so CLI output and JSON events must identify which limit won.
+- CLI-specific continuation behavior is parameterized in the shared goal service instead of duplicating goal prompt/accounting logic in the CLI.
diff --git a/docs/docs.json b/docs/docs.json
index 217683d273..6fadfcdd6b 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -123,6 +123,7 @@
               "adr/0001-experimental-image-generation-tool",
               "adr/0002-image-editing-visual-mockups",
               "adr/0003-context-boundaries-for-compaction-and-reset",
+              "adr/0004-cli-goal-runs-are-not-strict-goal-aliases",
               "AGENTS"
             ]
           }
diff --git a/docs/reference/cli.mdx b/docs/reference/cli.mdx
index eac228cb42..5b7c6264f5 100644
--- a/docs/reference/cli.mdx
+++ b/docs/reference/cli.mdx
@@ -10,7 +10,7 @@ description: Run one-off agent tasks from the command line with `mux run`
   Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) or similar TUIs.
 </Note>
 
-Mux provides a CLI for running one-off agent tasks without the desktop app. Unlike the interactive desktop experience, `mux run` executes a single request to completion and exits.
+Mux provides a CLI for running one-off agent tasks without the desktop app. Unlike the interactive desktop experience, `mux run` normally executes a single request to completion and exits. The `--goal` option is an explicit exception: it starts a CLI Goal Run that may perform automatic continuations until the goal is complete or a limit is reached.
 
 <Card title="GitHub Actions Guide" icon="github" href="/guides/github-actions">
   Learn how to use `mux run` in CI/CD pipelines
@@ -62,10 +62,42 @@ mux run --json "List all TypeScript files" | jq '.type'
 | `--mode <mode>`       |       | Agent mode: `plan` or `exec`                                                                                                  | `exec`            |
 | `--thinking <level>`  | `-t`  | Thinking level: `OFF`, `LOW`, `MED`, `HIGH`, `MAX`, or `0`–`9` (model-relative, see [Models](/config/models#thinking-levels)) | `MED`             |
 | `--budget <usd>`      | `-b`  | Stop when session cost exceeds budget (USD)                                                                                   | No limit          |
+| `--goal <objective>`  |       | Start a CLI Goal Run and continue until the persisted goal is complete or a limit stops it                                    | Off               |
+| `--goal-budget <n>`   |       | Goal budget (`$5`, `5.00`, or `500c`); separate from `--budget`                                                               | No limit          |
+| `--goal-turns <n>`    |       | Maximum automatic goal continuation turns                                                                                     | No limit          |
 | `--experiment <id>`   | `-e`  | Enable experiment (repeatable)                                                                                                | None              |
 | `--json`              |       | Output NDJSON for programmatic use                                                                                            | Off               |
 | `--quiet`             | `-q`  | Only output final result                                                                                                      | Off               |
 
+### CLI Goal Runs
+
+Use `--goal` when a task should keep going across automatic continuations until the agent marks the persisted goal complete:
+
+```bash
+# Goal text is also used as the initial message when no message/stdin is provided
+mux run --goal "Fix the failing tests and verify the suite passes"
+
+# Provide separate kickoff instructions while keeping the objective active
+mux run --goal "Ship the migration safely" "Start by inspecting the schema and propose a plan"
+
+# Bound automatic continuations with a goal-specific budget and turn cap
+mux run --goal "Complete the refactor" --goal-budget 5.00 --goal-turns 10
+```
+
+A CLI Goal Run is intentionally not a strict alias for interactive `/goal`. It is ephemeral to the `mux run` process, does not apply interactive goal defaults, bypasses the interactive continuation cooldown, and exits successfully only when the persisted goal status is `complete`. If neither `--goal-budget` nor `--goal-turns` is provided, Mux warns that the goal is uncapped.
+
+`--budget` remains the hard session spending limit in USD. `--goal-budget` is goal accounting, accepts forms like `$5`, `5.00`, and `500c`, and may allow a final budget-limit wrap-up turn. If the session `--budget` is exceeded, the run stops immediately.
+
+Exit codes for CLI Goal Runs:
+
+| Code  | Meaning                                                    |
+| ----- | ---------------------------------------------------------- |
+| `0`   | Goal completed (unless the agent set a nonzero exit code)  |
+| `1`   | Operational, model, or tool error                          |
+| `2`   | Session `--budget` exceeded                                |
+| `3`   | Goal stopped incomplete, including goal budget/turn limits |
+| `130` | User interrupt                                             |
+
 ### Runtimes
 
 - **`local`** (default): Runs directly in the specified directory. Best for one-off tasks.
@@ -82,6 +114,9 @@ mux run --json "List all TypeScript files" | jq '.type'
 ### Examples
 
 ```bash
+# Goal run with automatic continuations
+mux run --goal "Update dependencies, fix resulting tests, and verify the suite passes"
+
 # Quick fix in current directory
 mux run "Fix the TypeScript errors"
 
diff --git a/scripts/upload-harbor-results.py b/scripts/upload-harbor-results.py
index a820b6788c..878d465171 100644
--- a/scripts/upload-harbor-results.py
+++ b/scripts/upload-harbor-results.py
@@ -44,6 +44,11 @@ def load_json(path: Path) -> dict | None:
         return None
 
 
+def env_flag(name: str) -> bool:
+    """Return True for the env boolean spellings emitted by workflows."""
+    return (os.environ.get(name) or "").strip().lower() in {"1", "true"}
+
+
 def extract_trial_score(trial_result: dict) -> float | None:
     """Extract score from trial result, supporting multiple Harbor formats."""
     score = trial_result.get("score")
@@ -185,6 +190,7 @@ def build_rows(job_folder: Path) -> list[dict]:
         dataset = job_config.get("dataset")
 
     experiments = os.environ.get("MUX_EXPERIMENTS")
+    mux_run_as_goal = env_flag("MUX_RUN_AS_GOAL")
 
     # Raw JSON for future-proofing
     run_result_json = json.dumps(job_result) if job_result else None
@@ -233,6 +239,7 @@ def build_rows(job_folder: Path) -> list[dict]:
             "mode": mode,
             "dataset": dataset,
             "experiments": experiments,
+            "mux_run_as_goal": mux_run_as_goal,
             "run_started_at": None,  # Not available in Harbor format
             "run_completed_at": None,
             "n_resolved": None,  # Will be set after counting all trials
diff --git a/scripts/upload-tbench-results.py b/scripts/upload-tbench-results.py
index 809ade170f..549a95f82b 100755
--- a/scripts/upload-tbench-results.py
+++ b/scripts/upload-tbench-results.py
@@ -42,6 +42,9 @@ def load_json(path: Path) -> dict | None:
         return None
 
 
+def env_flag(name: str) -> bool:
+    """Return True for the env boolean spellings emitted by workflows."""
+    return (os.environ.get(name) or "").strip().lower() in {"1", "true"}
 
 
 def extract_thinking_from_config(config: dict) -> str | None:
@@ -198,6 +201,7 @@ def build_rows(job_folder: Path) -> list[dict]:
         dataset = job_config.get("dataset")
 
     experiments = os.environ.get("MUX_EXPERIMENTS")
+    mux_run_as_goal = env_flag("MUX_RUN_AS_GOAL")
 
     # Raw JSON for future-proofing
     run_result_json = json.dumps(job_result) if job_result else None
@@ -250,6 +254,7 @@ def build_rows(job_folder: Path) -> list[dict]:
             "mode": mode,
             "dataset": dataset,
             "experiments": experiments,
+            "mux_run_as_goal": mux_run_as_goal,
             "run_started_at": None,  # Not available in Harbor format
             "run_completed_at": None,
             "n_resolved": None,  # Will be set after counting all trials
diff --git a/src/cli/goalRunDriver.test.ts b/src/cli/goalRunDriver.test.ts
new file mode 100644
index 0000000000..7f3f513445
--- /dev/null
+++ b/src/cli/goalRunDriver.test.ts
@@ -0,0 +1,278 @@
+import { describe, expect, test } from "bun:test";
+import type { GoalRecordV1 } from "@/common/types/goal";
+import type { SendMessageOptions } from "@/common/orpc/types";
+import {
+  describeCliGoalStop,
+  driveCliGoalUntilTerminal,
+  type DriveCliGoalUntilTerminalOptions,
+} from "./goalRunDriver";
+
+function goal(overrides: Partial<GoalRecordV1> = {}): GoalRecordV1 {
+  return {
+    version: 1,
+    goalId: "goal-1",
+    objective: "finish",
+    status: "active",
+    budgetCents: null,
+    costCents: 0,
+    costMicroCents: 0,
+    turnCap: null,
+    turnsUsed: 0,
+    attributedChildren: [],
+    budgetLimitInjectedForGoalId: null,
+    requireUserAcknowledgmentSinceMs: null,
+    lastContinuationFiredAtMs: null,
+    createdAtMs: 1,
+    updatedAtMs: 1,
+    ...overrides,
+  };
+}
+
+function sendOptions(): SendMessageOptions {
+  return { model: "openai:gpt-4o", agentId: "exec" };
+}
+
+function options(
+  overrides: Partial<DriveCliGoalUntilTerminalOptions> = {}
+): DriveCliGoalUntilTerminalOptions {
+  return {
+    workspaceId: "workspace-1",
+    getGoal: () => Promise.resolve(goal()),
+    buildExecSendOptions: sendOptions,
+    requestContinuationAfterStreamEnd: () => Promise.resolve(),
+    requestDispatch: () => Promise.resolve(),
+    checkGoalContinuationEligibility: () => Promise.resolve({}),
+    prepareForContinuation: () => undefined,
+    waitForStreamStarted: () => Promise.resolve(),
+    waitForCompletion: () => Promise.resolve(),
+    isSessionBudgetExceeded: () => false,
+    nowMs: () => 123,
+    emitJsonLine: () => undefined,
+    writeHumanLineClosed: () => undefined,
+    setGoalStopReason: () => undefined,
+    describeError: String,
+    ...overrides,
+  };
+}
+
+describe("driveCliGoalUntilTerminal", () => {
+  test("continues an active goal until completion", async () => {
+    const goals = [goal(), goal({ status: "complete", completionSummary: "done" })];
+    const events: unknown[] = [];
+    const lines: string[] = [];
+    const reasons: string[] = [];
+    const continuations: Array<{ streamEndedAtMs: number; sendOptions: SendMessageOptions }> = [];
+    let dispatches = 0;
+    let waitStarts = 0;
+    let waitCompletions = 0;
+
+    const result = await driveCliGoalUntilTerminal(
+      options({
+        getGoal: () => Promise.resolve(goals.shift() ?? goals[goals.length - 1] ?? null),
+        requestContinuationAfterStreamEnd: (input) => {
+          continuations.push(input);
+          return Promise.resolve();
+        },
+        requestDispatch: () => {
+          dispatches += 1;
+          return Promise.resolve();
+        },
+        waitForStreamStarted: () => {
+          waitStarts += 1;
+          return Promise.resolve();
+        },
+        waitForCompletion: () => {
+          waitCompletions += 1;
+          return Promise.resolve();
+        },
+        emitJsonLine: (event) => events.push(event),
+        writeHumanLineClosed: (line = "") => lines.push(line),
+        setGoalStopReason: (reason) => reasons.push(reason),
+      })
+    );
+
+    expect(result?.status).toBe("complete");
+    expect(continuations).toHaveLength(1);
+    expect(continuations[0]?.streamEndedAtMs).toBe(123);
+    expect(dispatches).toBe(1);
+    expect(waitStarts).toBe(1);
+    expect(waitCompletions).toBe(1);
+    expect(events).toMatchObject([{ type: "goal-continuing" }, { type: "goal-completed" }]);
+    expect(lines).toEqual(["[goal] continuing...", "[goal] completed: done"]);
+    expect(reasons).toEqual(["complete"]);
+  });
+
+  test("passes the stream-start timeout to continuation waits", async () => {
+    const goals = [goal(), goal({ status: "complete" })];
+    const timeouts: Array<number | undefined> = [];
+
+    await driveCliGoalUntilTerminal(
+      options({
+        getGoal: () => Promise.resolve(goals.shift() ?? goals[goals.length - 1] ?? null),
+        streamStartTimeoutMs: 123,
+        waitForStreamStarted: (timeoutMs) => {
+          timeouts.push(timeoutMs);
+          return Promise.resolve();
+        },
+      })
+    );
+
+    expect(timeouts).toEqual([123]);
+  });
+
+  test("drives a budget-limited goal through its wrap-up", async () => {
+    const goals = [
+      goal({ status: "budget_limited", budgetCents: 100, costCents: 100 }),
+      goal({ status: "complete", completionSummary: "wrapped" }),
+    ];
+    const lines: string[] = [];
+    const result = await driveCliGoalUntilTerminal(
+      options({
+        getGoal: () => Promise.resolve(goals.shift() ?? goals[goals.length - 1] ?? null),
+        writeHumanLineClosed: (line = "") => lines.push(line),
+      })
+    );
+
+    expect(result?.status).toBe("complete");
+    expect(lines).toEqual(["[goal] budget wrap-up...", "[goal] completed: wrapped"]);
+  });
+
+  test("stops when a budget wrap-up already fired", async () => {
+    const reasons: string[] = [];
+    const result = await driveCliGoalUntilTerminal(
+      options({
+        getGoal: () =>
+          Promise.resolve(
+            goal({
+              status: "budget_limited",
+              budgetCents: 100,
+              costCents: 100,
+              budgetLimitInjectedForGoalId: "goal-1",
+            })
+          ),
+        requestContinuationAfterStreamEnd: () => Promise.reject(new Error("should not continue")),
+        requestDispatch: () => Promise.reject(new Error("should not dispatch")),
+        prepareForContinuation: () => {
+          throw new Error("should not prepare");
+        },
+        setGoalStopReason: (reason) => reasons.push(reason),
+      })
+    );
+
+    expect(result?.status).toBe("budget_limited");
+    expect(reasons).toEqual(["goal budget reached"]);
+  });
+
+  test("returns the latest goal when session budget stops after a continuation", async () => {
+    const goals = [goal(), goal({ turnsUsed: 1 })];
+    const reasons: string[] = [];
+    const result = await driveCliGoalUntilTerminal(
+      options({
+        getGoal: () => Promise.resolve(goals.shift() ?? null),
+        isSessionBudgetExceeded: () => true,
+        setGoalStopReason: (reason) => reasons.push(reason),
+      })
+    );
+
+    expect(result?.turnsUsed).toBe(1);
+    expect(reasons).toEqual(["session budget exceeded"]);
+  });
+
+  test("reports completion when the goal completes during a session-budgeted continuation", async () => {
+    const goals = [goal(), goal({ status: "complete", completionSummary: "finished" })];
+    const events: unknown[] = [];
+    const lines: string[] = [];
+    const reasons: string[] = [];
+
+    const result = await driveCliGoalUntilTerminal(
+      options({
+        getGoal: () => Promise.resolve(goals.shift() ?? goals[goals.length - 1] ?? null),
+        isSessionBudgetExceeded: () => true,
+        emitJsonLine: (event) => events.push(event),
+        writeHumanLineClosed: (line = "") => lines.push(line),
+        setGoalStopReason: (reason) => reasons.push(reason),
+      })
+    );
+
+    expect(result?.status).toBe("complete");
+    expect(reasons).toEqual(["complete"]);
+    expect(events).toMatchObject([{ type: "goal-continuing" }, { type: "goal-completed" }]);
+    expect(lines).toEqual(["[goal] continuing...", "[goal] completed: finished"]);
+  });
+
+  test("throws when the continuation safety limit is reached", () =>
+    expect(driveCliGoalUntilTerminal(options({ continuationSafetyLimit: 1 }))).rejects.toThrow(
+      "continuation safety guard"
+    ));
+
+  test("returns null when the goal disappears", async () => {
+    const reasons: string[] = [];
+    const result = await driveCliGoalUntilTerminal(
+      options({
+        getGoal: () => Promise.resolve(null),
+        setGoalStopReason: (reason) => reasons.push(reason),
+      })
+    );
+
+    expect(result).toBeNull();
+    expect(reasons).toEqual(["goal missing"]);
+  });
+
+  test("returns paused goals without requesting another continuation", async () => {
+    const reasons: string[] = [];
+    const result = await driveCliGoalUntilTerminal(
+      options({
+        getGoal: () => Promise.resolve(goal({ status: "paused" })),
+        requestContinuationAfterStreamEnd: () => Promise.reject(new Error("should not continue")),
+        setGoalStopReason: (reason) => reasons.push(reason),
+      })
+    );
+
+    expect(result?.status).toBe("paused");
+    expect(reasons).toEqual(["goal paused"]);
+  });
+
+  test("reports continuation eligibility when no stream starts", () =>
+    expect(
+      driveCliGoalUntilTerminal(
+        options({
+          checkGoalContinuationEligibility: () => Promise.resolve({ reason: "cooldown" }),
+          waitForStreamStarted: () => Promise.reject(new Error("timeout")),
+          waitForCompletion: () => Promise.reject(new Error("should not wait for completion")),
+        })
+      )
+    ).rejects.toThrow("CLI Goal Run made no progress (cooldown)"));
+});
+
+describe("describeCliGoalStop", () => {
+  const cases: Array<[string, GoalRecordV1 | null, string]> = [
+    ["missing goal", null, "goal missing"],
+    [
+      "budget and turn caps reached",
+      goal({
+        status: "budget_limited",
+        budgetCents: 100,
+        costCents: 100,
+        turnCap: 2,
+        turnsUsed: 2,
+      }),
+      "goal budget and turn caps reached",
+    ],
+    [
+      "budget cap reached",
+      goal({ status: "budget_limited", budgetCents: 100, costCents: 100 }),
+      "goal budget reached",
+    ],
+    [
+      "turn cap reached",
+      goal({ status: "budget_limited", turnCap: 2, turnsUsed: 2 }),
+      "goal turn cap reached",
+    ],
+    ["generic limit reached", goal({ status: "budget_limited" }), "goal limit reached"],
+    ["paused goal", goal({ status: "paused" }), "goal paused"],
+  ];
+
+  test.each(cases)("describes %s", (_name, input, expected) => {
+    expect(describeCliGoalStop(input)).toBe(expected);
+  });
+});
diff --git a/src/cli/goalRunDriver.ts b/src/cli/goalRunDriver.ts
new file mode 100644
index 0000000000..feab26ad41
--- /dev/null
+++ b/src/cli/goalRunDriver.ts
@@ -0,0 +1,130 @@
+import assert from "@/common/utils/assert";
+import type { GoalRecordV1 } from "@/common/types/goal";
+import type { SendMessageOptions } from "@/common/orpc/types";
+import { CLI_GOAL_CONTINUATION_SAFETY_LIMIT } from "@/constants/goals";
+
+interface EligibilityHint {
+  reason?: string | null;
+}
+
+interface GoalContinuationRequest {
+  sendOptions: SendMessageOptions;
+  streamEndedAtMs: number;
+}
+
+export interface DriveCliGoalUntilTerminalOptions {
+  workspaceId: string;
+  getGoal: () => Promise<GoalRecordV1 | null>;
+  buildExecSendOptions: () => SendMessageOptions;
+  requestContinuationAfterStreamEnd: (input: GoalContinuationRequest) => Promise<void>;
+  requestDispatch: () => Promise<void>;
+  checkGoalContinuationEligibility: (nowMs: number) => Promise<EligibilityHint>;
+  prepareForContinuation: () => void;
+  waitForStreamStarted: (timeoutMs?: number) => Promise<void>;
+  waitForCompletion: () => Promise<void>;
+  isSessionBudgetExceeded: () => boolean;
+  nowMs: () => number;
+  emitJsonLine: (payload: unknown) => void;
+  writeHumanLineClosed: (text?: string) => void;
+  setGoalStopReason: (reason: string) => void;
+  describeError: (error: unknown) => string;
+  continuationSafetyLimit?: number;
+  streamStartTimeoutMs?: number;
+}
+
+/** Records the same terminal completion event regardless of where the loop observes it. */
+function recordCliGoalCompleted(
+  opts: DriveCliGoalUntilTerminalOptions,
+  goal: GoalRecordV1
+): GoalRecordV1 {
+  opts.setGoalStopReason("complete");
+  opts.emitJsonLine({
+    type: "goal-completed",
+    workspaceId: opts.workspaceId,
+    goalId: goal.goalId,
+    completionSummary: goal.completionSummary ?? null,
+  });
+  opts.writeHumanLineClosed(`[goal] completed: ${goal.completionSummary ?? "complete"}`);
+  return goal;
+}
+
+/** Returns the stable stop-reason string surfaced in CLI JSON and human output. */
+export function describeCliGoalStop(goal: GoalRecordV1 | null): string {
+  if (!goal) return "goal missing";
+  if (goal.status === "budget_limited") {
+    const hitTurnCap = goal.turnCap != null && goal.turnsUsed >= goal.turnCap;
+    const hitBudget = goal.budgetCents != null && goal.costCents >= goal.budgetCents;
+    if (hitBudget && hitTurnCap) return "goal budget and turn caps reached";
+    if (hitBudget) return "goal budget reached";
+    if (hitTurnCap) return "goal turn cap reached";
+    return "goal limit reached";
+  }
+  return `goal ${goal.status}`;
+}
+
+/**
+ * Drives a CLI goal by requesting continuations until the persisted goal reaches
+ * a terminal state. Returns the last goal record, or null if the goal disappears;
+ * throws only when continuation dispatch fails before a terminal goal state exists.
+ */
+export async function driveCliGoalUntilTerminal(
+  opts: DriveCliGoalUntilTerminalOptions
+): Promise<GoalRecordV1 | null> {
+  const continuationSafetyLimit =
+    opts.continuationSafetyLimit ?? CLI_GOAL_CONTINUATION_SAFETY_LIMIT;
+  const streamStartTimeoutMs = opts.streamStartTimeoutMs;
+  let continuationCount = 0;
+
+  while (true) {
+    const goal = await opts.getGoal();
+    if (goal?.status === "complete") {
+      return recordCliGoalCompleted(opts, goal);
+    }
+    if (!goal || goal.status === "paused") {
+      opts.setGoalStopReason(describeCliGoalStop(goal));
+      return goal;
+    }
+    if (goal.status === "budget_limited" && goal.budgetLimitInjectedForGoalId === goal.goalId) {
+      opts.setGoalStopReason(describeCliGoalStop(goal));
+      return goal;
+    }
+
+    continuationCount += 1;
+    assert(
+      continuationCount < continuationSafetyLimit,
+      "CLI Goal Run exceeded the continuation safety guard"
+    );
+    opts.prepareForContinuation();
+    const phase = goal.status === "budget_limited" ? "budget wrap-up" : "continuing";
+    opts.emitJsonLine({
+      type: "goal-continuing",
+      workspaceId: opts.workspaceId,
+      goalId: goal.goalId,
+      status: goal.status,
+      continuation: continuationCount,
+    });
+    opts.writeHumanLineClosed(`[goal] ${phase}...`);
+    await opts.requestContinuationAfterStreamEnd({
+      sendOptions: opts.buildExecSendOptions(),
+      streamEndedAtMs: opts.nowMs(),
+    });
+    await opts.requestDispatch();
+    try {
+      await opts.waitForStreamStarted(streamStartTimeoutMs);
+    } catch (error) {
+      const eligibility = await opts.checkGoalContinuationEligibility(opts.nowMs());
+      throw new Error(
+        `CLI Goal Run made no progress (${eligibility.reason ?? opts.describeError(error)})`
+      );
+    }
+    await opts.waitForCompletion();
+    if (opts.isSessionBudgetExceeded()) {
+      const latestGoal = await opts.getGoal();
+      if (latestGoal?.status === "complete") {
+        return recordCliGoalCompleted(opts, latestGoal);
+      }
+      opts.setGoalStopReason("session budget exceeded");
+      return latestGoal;
+    }
+  }
+}
diff --git a/src/cli/run.test.ts b/src/cli/run.test.ts
index 876081af49..f2f6a1d14d 100644
--- a/src/cli/run.test.ts
+++ b/src/cli/run.test.ts
@@ -157,6 +157,9 @@ describe("mux CLI", () => {
       expect(result.stdout).toContain("--mode");
       expect(result.stdout).toContain("--thinking");
       expect(result.stdout).toContain("--hide-costs");
+      expect(result.stdout).toContain("--goal");
+      expect(result.stdout).toContain("--goal-budget");
+      expect(result.stdout).toContain("--goal-turns");
       expect(result.stdout).toContain("--json");
       expect(result.stdout).toContain("--quiet");
     });
@@ -181,6 +184,42 @@ describe("mux CLI", () => {
       expect(result.output).toContain("No message provided");
     });
 
+    test("empty --goal shows a goal-specific error", async () => {
+      const result = await runRunDirect(["--goal", ""]);
+      expect(result.exitCode).toBe(1);
+      expect(result.output).toContain("--goal requires a non-empty objective");
+    });
+
+    test("--goal supplies the initial message when no message or stdin is provided", async () => {
+      const result = await runRunDirect([
+        "--goal",
+        "finish the objective",
+        "--dir",
+        "/nonexistent/path/for/goal/test",
+      ]);
+      expect(result.output).not.toContain("No message provided");
+      expect(result.output).not.toContain("--goal requires a non-empty objective");
+      expect(result.exitCode).toBe(1);
+    });
+
+    test("--goal-budget and --goal-turns require --goal", async () => {
+      const result = await runRunDirect(["--goal-budget", "5", "test message"]);
+      expect(result.exitCode).toBe(1);
+      expect(result.output).toContain("--goal-budget and --goal-turns require --goal");
+    });
+
+    test("invalid --goal-budget shows error", async () => {
+      const result = await runRunDirect(["--goal", "ship", "--goal-budget", "five"]);
+      expect(result.exitCode).toBe(1);
+      expect(result.output).toContain("Invalid --goal-budget");
+    });
+
+    test("invalid --goal-turns shows error", async () => {
+      const result = await runRunDirect(["--goal", "ship", "--goal-turns", "0"]);
+      expect(result.exitCode).toBe(1);
+      expect(result.output).toContain("Invalid --goal-turns");
+    });
+
     test("xhigh thinking level is accepted", async () => {
       const result = await runRunDirect([
         "--thinking",
diff --git a/src/cli/run.ts b/src/cli/run.ts
index 7596cdab67..e4371d0354 100644
--- a/src/cli/run.ts
+++ b/src/cli/run.ts
@@ -77,6 +77,16 @@ import { execSync } from "child_process";
 import { getParseOptions } from "./argv";
 import { EXPERIMENT_IDS } from "../common/constants/experiments";
 import { getErrorMessage } from "@/common/utils/errors";
+import { describeCliGoalStop, driveCliGoalUntilTerminal } from "./goalRunDriver";
+import {
+  parseGoalBudgetInputCents,
+  parseGoalTurnCapInput,
+} from "@/common/utils/goals/budgetParser";
+import {
+  CLI_GOAL_STREAM_START_TIMEOUT_MS,
+  GOAL_CONTINUATION_IDLE_CONSUMER_NAME,
+} from "@/constants/goals";
+import type { GoalRecordV1 } from "@/common/types/goal";
 
 // Display labels for CLI help (OFF, LOW, MED, HIGH, MAX).
 // Deduplicate because xhigh and max both display as "MAX" for default/Anthropic
@@ -150,6 +160,26 @@ function parseMode(value: string | undefined): CLIMode {
   throw new Error(`Invalid mode "${value}". Expected: plan, exec`);
 }
 
+function parseGoalBudgetFlag(value: string | undefined): number | null | undefined {
+  if (value == null) return undefined;
+  const parsed = parseGoalBudgetInputCents(value);
+  if (parsed === undefined) {
+    throw new Error(
+      'Invalid --goal-budget "' + value + '". Expected dollars like 5, $5.00, or cents like 500c'
+    );
+  }
+  return parsed;
+}
+
+function parseGoalTurnsFlag(value: string | undefined): number | undefined {
+  if (value == null) return undefined;
+  const parsed = parseGoalTurnCapInput(value);
+  if (parsed == null) {
+    throw new Error('Invalid --goal-turns "' + value + '". Expected a positive integer');
+  }
+  return parsed;
+}
+
 function generateWorkspaceId(): string {
   const timestamp = Date.now();
   const random = Math.random().toString(36).substring(2, 8);
@@ -305,6 +335,9 @@ program
   .option("--no-mcp-config", "ignore global + repo MCP config files (use only --mcp servers)")
   .option("-e, --experiment <id>", "enable experiment (can be repeated)", collectExperiments, [])
   .option("-b, --budget <usd>", "stop when session cost exceeds budget (USD)", parseFloat)
+  .option("--goal <objective>", "drive an ephemeral CLI Goal Run until complete")
+  .option("--goal-budget <budget>", "goal budget, e.g. $5, 5.00, or 500c")
+  .option("--goal-turns <turns>", "maximum automatic goal continuation turns")
   .option("--service-tier <tier>", "OpenAI service tier: auto, default, flex, priority")
   .option("--use-1m", "enable 1M context window for supported Anthropic models")
   .option(
@@ -318,6 +351,8 @@ Examples:
   $ mux run "Fix the failing tests"
   $ mux run --dir /path/to/project "Add authentication"
   $ mux run --runtime "ssh user@host" "Deploy changes"
+  $ mux run --goal "Fix tests and verify they pass"
+  $ mux run --goal "Ship the refactor" --goal-budget 5.00 --goal-turns 10
   $ mux run --mode plan "Refactor the auth module"
   $ mux run --budget 1.50 "Quick code review"
   $ echo "Add logging" | mux run
@@ -344,6 +379,9 @@ interface CLIOptions {
   mcpConfig: boolean;
   experiment: string[];
   budget?: number;
+  goal?: string;
+  goalBudget?: string;
+  goalTurns?: string;
   serviceTier?: ServiceTier;
   use1m?: boolean;
   keepBackgroundProcesses?: boolean;
@@ -371,10 +409,17 @@ async function main(): Promise<number> {
 
   // Get message from arg or stdin
   const stdinMessage = await gatherMessageFromStdin();
-  const message = messageArg?.trim() || stdinMessage.trim();
+  const goalObjective = opts.goal?.trim() ?? "";
+  const hasGoal = opts.goal !== undefined;
+  if (hasGoal && goalObjective.length === 0) {
+    console.error("Error: --goal requires a non-empty objective");
+    process.exit(1);
+  }
+
+  const message = messageArg?.trim() || stdinMessage.trim() || goalObjective;
 
   if (!message) {
-    console.error("Error: No message provided. Pass as argument or pipe via stdin.");
+    console.error("Error: No message provided. Pass as argument, pipe via stdin, or use --goal.");
     console.error('Usage: mux run "Your instruction here"');
     process.exit(1);
   }
@@ -453,6 +498,12 @@ async function main(): Promise<number> {
     }
   }
 
+  const goalBudgetCents = parseGoalBudgetFlag(opts.goalBudget);
+  const goalTurnCap = parseGoalTurnsFlag(opts.goalTurns);
+  if (!hasGoal && (goalBudgetCents !== undefined || goalTurnCap !== undefined)) {
+    console.error("Error: --goal-budget and --goal-turns require --goal");
+    process.exit(1);
+  }
   const suppressHumanOutput = emitJson || quiet;
   const stdoutIsTTY = process.stdout.isTTY === true;
   const stderrIsTTY = process.stderr.isTTY === true;
@@ -508,6 +559,8 @@ async function main(): Promise<number> {
     mcpServerManager,
     providerService,
     workspaceService,
+    workspaceGoalService,
+    idleDispatcher,
   } = createCoreServices({
     config,
     extensionMetadataPath: path.join(tempDir.path, "extensionMetadata.json"),
@@ -516,6 +569,13 @@ async function main(): Promise<number> {
       inlineServers,
       ignoreConfigFile: !opts.mcpConfig,
     },
+    goalServiceOptions: hasGoal
+      ? {
+          continuationCooldownMs: 0,
+          allowUserOriginBudgetWrapup: true,
+          suppressKickoffContinuation: true,
+        }
+      : undefined,
   });
 
   // `mux run` uses createCoreServices directly (without ServiceContainer), so wire
@@ -556,6 +616,7 @@ async function main(): Promise<number> {
     aiService,
     initStateManager,
     backgroundProcessManager,
+    workspaceGoalService,
     keepBackgroundProcesses,
   });
   // Register with WorkspaceService so TaskService operations that target the parent
@@ -718,6 +779,37 @@ async function main(): Promise<number> {
     // Plan agent instructions are handled by the backend (has access to plan file path)
   });
 
+  let goalStopReason: string | null = null;
+  if (hasGoal) {
+    const setGoalResult = await workspaceGoalService.setGoal({
+      workspaceId,
+      objective: goalObjective,
+      budgetCents: goalBudgetCents ?? null,
+      turnCap: goalTurnCap ?? null,
+      initiator: "user",
+    });
+    if (!setGoalResult.success) {
+      throw new Error(`Failed to set CLI goal: ${setGoalResult.error.type}`);
+    }
+    const warning =
+      goalBudgetCents == null && goalTurnCap == null
+        ? "CLI Goal Run has no --goal-budget or --goal-turns limit. It will continue until the goal is complete or another stop condition occurs."
+        : null;
+    if (warning) {
+      emitJsonLine({ type: "goal-warning", workspaceId, warning });
+      writeHumanLine(`[goal] warning: ${warning}`);
+    }
+    emitJsonLine({
+      type: "goal-started",
+      workspaceId,
+      goalId: setGoalResult.data.goalId,
+      objective: goalObjective,
+      budgetCents: setGoalResult.data.budgetCents,
+      turnCap: setGoalResult.data.turnCap,
+    });
+    writeHumanLine(`[goal] started: ${goalObjective}`);
+  }
+
   const liveEvents: WorkspaceChatMessage[] = [];
   let readyForLive = false;
 
@@ -795,8 +887,14 @@ async function main(): Promise<number> {
   let rejectCompletion: ((reason?: unknown) => void) | null = null;
   let completionPromise: Promise<void> = Promise.resolve();
 
+  let resolveStreamStarted: (() => void) | null = null;
+  let streamStartedPromise: Promise<void> = Promise.resolve();
+
   const createCompletionPromise = (): Promise<void> => {
     streamEnded = false;
+    streamStartedPromise = new Promise<void>((resolve) => {
+      resolveStreamStarted = resolve;
+    });
     return new Promise<void>((resolve, reject) => {
       resolveCompletion = resolve;
       rejectCompletion = reject;
@@ -811,9 +909,35 @@ async function main(): Promise<number> {
     }
   };
 
+  const waitForStreamStarted = async (timeoutMs?: number): Promise<void> => {
+    let timer: ReturnType<typeof setTimeout> | null = null;
+    const streamFailedOrEndedBeforeStart = completionPromise.then(() => {
+      throw new Error("Goal continuation stream ended before it started");
+    });
+    const waits: Array<Promise<void>> = [streamStartedPromise, streamFailedOrEndedBeforeStart];
+    if (timeoutMs != null) {
+      waits.push(
+        new Promise<never>((_, reject) => {
+          timer = setTimeout(() => {
+            reject(new Error("Timed out waiting for goal continuation stream to start"));
+          }, timeoutMs);
+          timer.unref?.();
+        })
+      );
+    }
+    try {
+      await Promise.race(waits);
+    } finally {
+      if (timer != null) {
+        clearTimeout(timer);
+      }
+    }
+  };
+
   const resetCompletionHandlers = () => {
     resolveCompletion = null;
     rejectCompletion = null;
+    resolveStreamStarted = null;
   };
 
   const rejectStream = (error: Error) => {
@@ -855,6 +979,11 @@ async function main(): Promise<number> {
     await waitForCompletion();
   };
 
+  const getGoal = async (): Promise<GoalRecordV1 | null> => {
+    if (!hasGoal) return null;
+    return workspaceGoalService.getGoal(workspaceId);
+  };
+
   const handleToolStart = (payload: WorkspaceChatMessage): boolean => {
     if (!isToolCallStart(payload)) return false;
 
@@ -941,6 +1070,7 @@ async function main(): Promise<number> {
         );
         return;
       }
+      resolveStreamStarted?.();
       activeMessageId = payload.messageId;
       return;
     }
@@ -1133,6 +1263,9 @@ async function main(): Promise<number> {
     }
   };
 
+  let finalGoalRecord: GoalRecordV1 | null = null;
+  let goalDriverError: unknown = null;
+
   const unsubscribe = await session.subscribeChat(chatListener);
 
   try {
@@ -1145,7 +1278,10 @@ async function main(): Promise<number> {
       const planWasProposed = planProposed;
       planProposed = false;
       if (initialMode === "plan" && !planWasProposed) {
-        throw new Error("Plan mode was requested, but the assistant never proposed a plan.");
+        const goalAfterFirstTurn = await getGoal();
+        if (!hasGoal || goalAfterFirstTurn?.status !== "budget_limited") {
+          throw new Error("Plan mode was requested, but the assistant never proposed a plan.");
+        }
       }
       if (planWasProposed) {
         writeHumanLineClosed(
@@ -1153,23 +1289,73 @@ async function main(): Promise<number> {
         );
         await sendAndAwait("Plan approved. Execute it.", buildSendOptions("exec"));
       }
+      if (hasGoal && !budgetExceeded) {
+        try {
+          await driveCliGoalUntilTerminal({
+            workspaceId,
+            getGoal,
+            buildExecSendOptions: () => buildSendOptions("exec"),
+            requestContinuationAfterStreamEnd: (input) =>
+              workspaceGoalService.requestContinuationAfterStreamEnd({
+                workspaceId,
+                ...input,
+              }),
+            requestDispatch: () =>
+              idleDispatcher.requestDispatch(workspaceId, GOAL_CONTINUATION_IDLE_CONSUMER_NAME),
+            checkGoalContinuationEligibility: (nowMs) =>
+              workspaceGoalService.checkGoalContinuationEligibility(workspaceId, nowMs),
+            prepareForContinuation: () => {
+              completionPromise = createCompletionPromise();
+            },
+            waitForStreamStarted,
+            waitForCompletion,
+            streamStartTimeoutMs: CLI_GOAL_STREAM_START_TIMEOUT_MS,
+            isSessionBudgetExceeded: () => budgetExceeded,
+            nowMs: Date.now,
+            emitJsonLine,
+            writeHumanLineClosed,
+            setGoalStopReason: (reason) => {
+              goalStopReason = reason;
+            },
+            describeError: getErrorMessage,
+          });
+        } catch (error) {
+          goalDriverError = error;
+          goalStopReason = getErrorMessage(error);
+        }
+      }
+    }
+
+    finalGoalRecord = await getGoal();
+
+    if (
+      budgetExceeded &&
+      hasGoal &&
+      goalStopReason == null &&
+      finalGoalRecord?.status !== "complete"
+    ) {
+      goalStopReason = "session budget exceeded";
     }
 
     // Output final result for --quiet mode
     if (quiet) {
-      let finalEvent: WorkspaceChatMessage | undefined;
-      for (let i = liveEvents.length - 1; i >= 0; i--) {
-        if (isStreamEnd(liveEvents[i])) {
-          finalEvent = liveEvents[i];
-          break;
+      if (finalGoalRecord?.status === "complete" && finalGoalRecord.completionSummary) {
+        console.log(finalGoalRecord.completionSummary);
+      } else {
+        let finalEvent: WorkspaceChatMessage | undefined;
+        for (let i = liveEvents.length - 1; i >= 0; i--) {
+          if (isStreamEnd(liveEvents[i])) {
+            finalEvent = liveEvents[i];
+            break;
+          }
         }
-      }
-      if (finalEvent && isStreamEnd(finalEvent)) {
-        const parts = (finalEvent as unknown as { parts?: unknown[] }).parts ?? [];
-        for (const part of parts) {
-          if (part && typeof part === "object" && "type" in part && part.type === "text") {
-            const text = (part as { text?: string }).text;
-            if (text) console.log(text);
+        if (finalEvent && isStreamEnd(finalEvent)) {
+          const parts = (finalEvent as unknown as { parts?: unknown[] }).parts ?? [];
+          for (const part of parts) {
+            if (part && typeof part === "object" && "type" in part && part.type === "text") {
+              const text = (part as { text?: string }).text;
+              if (text) console.log(text);
+            }
           }
         }
       }
@@ -1194,6 +1380,16 @@ async function main(): Promise<number> {
             }
           : null,
         cost_usd: totalCost ?? null,
+        goal: finalGoalRecord
+          ? {
+              status: finalGoalRecord.status,
+              goalId: finalGoalRecord.goalId,
+              completionSummary: finalGoalRecord.completionSummary ?? null,
+              stopReason: goalStopReason,
+              costCents: finalGoalRecord.costCents,
+              turnsUsed: finalGoalRecord.turnsUsed,
+            }
+          : null,
       });
     }
 
@@ -1216,8 +1412,19 @@ async function main(): Promise<number> {
     }
   }
 
-  // Exit codes: 2 for budget exceeded, agent-specified exit code, or 0 for success
   if (budgetExceeded) return 2;
+  if (hasGoal && (goalDriverError != null || finalGoalRecord?.status !== "complete")) {
+    const reason = goalStopReason ?? describeCliGoalStop(finalGoalRecord);
+    writeHumanLineClosed(`[goal] stopped: ${reason}`);
+    emitJsonLine({
+      type: "goal-incomplete",
+      workspaceId,
+      goalId: finalGoalRecord?.goalId ?? null,
+      status: finalGoalRecord?.status ?? null,
+      stopReason: reason,
+    });
+    return 3;
+  }
   return agentExitCode ?? 0;
 }
 
diff --git a/src/constants/goals.ts b/src/constants/goals.ts
index f73e5a3a2b..f101d8d31f 100644
--- a/src/constants/goals.ts
+++ b/src/constants/goals.ts
@@ -1,6 +1,14 @@
 export const GOAL_CONTINUATION_IDLE_CONSUMER_NAME = "goal_continuation";
 export const GOAL_CONTINUATION_IDLE_CONSUMER_PRIORITY = 100;
 export const DEFAULT_GOAL_CONTINUATION_COOLDOWN_MS = 60_000;
+export const CLI_GOAL_CONTINUATION_SAFETY_LIMIT = 10_000;
+
+/**
+ * Upper bound for waiting on a CLI goal continuation to actually start. This is
+ * intentionally much longer than normal stream startup so slow CI/runtime warmup
+ * does not fail goal runs, while still preventing indefinite benchmark hangs.
+ */
+export const CLI_GOAL_STREAM_START_TIMEOUT_MS = 5 * 60 * 1000;
 export const GOAL_CONTINUATION_KIND = "goal_continuation";
 export const GOAL_BUDGET_LIMIT_KIND = "goal_budget_limit";
 export const GOAL_OBJECTIVE_OPEN_TAG = "<untrusted_objective>";
diff --git a/src/node/builtinSkills/mux-docs.md b/src/node/builtinSkills/mux-docs.md
index c7549df8e2..786ad58e5b 100644
--- a/src/node/builtinSkills/mux-docs.md
+++ b/src/node/builtinSkills/mux-docs.md
@@ -108,6 +108,7 @@ Use this index to find a page's:
     - Experimental Image Generation Tool (`/adr/0001-experimental-image-generation-tool`) → `references/docs/adr/0001-experimental-image-generation-tool.md` — Architecture decision for Mux's experimental image generation tool and generated-image display messages
     - Image Editing Uses a Separate General-Purpose Tool (`/adr/0002-image-editing-visual-mockups`) → `references/docs/adr/0002-image-editing-visual-mockups.md` — Architecture decision for Mux's experimental image editing tool and edited image display messages
     - Context Boundaries for Compaction and Reset (`/adr/0003-context-boundaries-for-compaction-and-reset`) → `references/docs/adr/0003-context-boundaries-for-compaction-and-reset.md` — Architecture decision for modeling provider context windows separately from transcript history
+    - CLI Goal Runs are not strict /goal aliases (`/adr/0004-cli-goal-runs-are-not-strict-goal-aliases`) → `references/docs/adr/0004-cli-goal-runs-are-not-strict-goal-aliases.md` — Architecture decision for giving mux run --goal CLI-specific completion and limit semantics
     - AGENTS.md (`/AGENTS`) → `references/docs/AGENTS.md` — Agent instructions for AI assistants working on the Mux codebase
 <!-- END DOCS_TREE -->
 
diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts
index 9a68090ee1..7b42fe3c55 100644
--- a/src/node/services/agentSkills/builtInSkillContent.generated.ts
+++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts
@@ -403,6 +403,40 @@ export const BUILTIN_SKILL_FILES: Record<string, Record<string, string>> = {
       "- Persisted boundary metadata should distinguish boundary kinds instead of representing context resets as fake compaction summaries.",
       "",
     ].join("\n"),
+    "references/docs/adr/0004-cli-goal-runs-are-not-strict-goal-aliases.md": [
+      "---",
+      "title: CLI Goal Runs are not strict /goal aliases",
+      "description: Architecture decision for giving mux run --goal CLI-specific completion and limit semantics",
+      "---",
+      "",
+      "# 0004. CLI Goal Runs are not strict /goal aliases",
+      "",
+      "## Status",
+      "",
+      "Accepted",
+      "",
+      "## Context",
+      "",
+      "`mux run` is designed for automation: it normally sends one request, streams the result, and exits. Interactive `/goal` is a workspace lifecycle command with defaults, controls, and cooldown behavior that assume a user can intervene from the UI.",
+      "",
+      "Adding `mux run --goal` creates a different automation need. A script needs one process to keep driving an objective until there is an authoritative completion signal, while still preserving goal accounting and model-facing goal tools.",
+      "",
+      "## Decision",
+      "",
+      "Mux will model `mux run --goal` as a CLI Goal Run, not as a strict alias for interactive `/goal`.",
+      "",
+      "A CLI Goal Run creates an ephemeral goal for the `mux run` process, sends either the provided message/stdin or the goal text as the kickoff message, and continues in exec mode until the persisted goal status is `complete` or a stop condition is reached. Interactive goal defaults are not applied; omitted `--goal-budget` and `--goal-turns` mean no goal-specific limit. The existing session `--budget` remains a separate hard stop.",
+      "",
+      "CLI Goal Runs bypass the interactive goal continuation cooldown because the process itself is the automation boundary. They still use the shared goal service for prompts, accounting, tool availability, budget-limited wrap-up, and persisted completion state.",
+      "",
+      "## Consequences",
+      "",
+      "- `mux run` remains single-request by default, with `--goal` documented as the explicit multi-continuation exception.",
+      "- Scripts can trust exit code `0` only when the persisted goal is complete; free-text claims are not enough unless existing goal completion fallback persisted them.",
+      "- Goal and session budgets can stop the same process for different reasons, so CLI output and JSON events must identify which limit won.",
+      "- CLI-specific continuation behavior is parameterized in the shared goal service instead of duplicating goal prompt/accounting logic in the CLI.",
+      "",
+    ].join("\n"),
     "references/docs/AGENTS.md": [
       "---",
       "title: AGENTS.md",
@@ -3523,6 +3557,7 @@ export const BUILTIN_SKILL_FILES: Record<string, Record<string, string>> = {
       '              "adr/0001-experimental-image-generation-tool",',
       '              "adr/0002-image-editing-visual-mockups",',
       '              "adr/0003-context-boundaries-for-compaction-and-reset",',
+      '              "adr/0004-cli-goal-runs-are-not-strict-goal-aliases",',
       '              "AGENTS"',
       "            ]",
       "          }",
@@ -5247,7 +5282,7 @@ export const BUILTIN_SKILL_FILES: Record<string, Record<string, string>> = {
       "  Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) or similar TUIs.",
       "</Note>",
       "",
-      "Mux provides a CLI for running one-off agent tasks without the desktop app. Unlike the interactive desktop experience, `mux run` executes a single request to completion and exits.",
+      "Mux provides a CLI for running one-off agent tasks without the desktop app. Unlike the interactive desktop experience, `mux run` normally executes a single request to completion and exits. The `--goal` option is an explicit exception: it starts a CLI Goal Run that may perform automatic continuations until the goal is complete or a limit is reached.",
       "",
       '<Card title="GitHub Actions Guide" icon="github" href="/guides/github-actions">',
       "  Learn how to use `mux run` in CI/CD pipelines",
@@ -5299,10 +5334,42 @@ export const BUILTIN_SKILL_FILES: Record<string, Record<string, string>> = {
       "| `--mode <mode>`       |       | Agent mode: `plan` or `exec`                                                                                                  | `exec`            |",
       "| `--thinking <level>`  | `-t`  | Thinking level: `OFF`, `LOW`, `MED`, `HIGH`, `MAX`, or `0`–`9` (model-relative, see [Models](/config/models#thinking-levels)) | `MED`             |",
       "| `--budget <usd>`      | `-b`  | Stop when session cost exceeds budget (USD)                                                                                   | No limit          |",
+      "| `--goal <objective>`  |       | Start a CLI Goal Run and continue until the persisted goal is complete or a limit stops it                                    | Off               |",
+      "| `--goal-budget <n>`   |       | Goal budget (`$5`, `5.00`, or `500c`); separate from `--budget`                                                               | No limit          |",
+      "| `--goal-turns <n>`    |       | Maximum automatic goal continuation turns                                                                                     | No limit          |",
       "| `--experiment <id>`   | `-e`  | Enable experiment (repeatable)                                                                                                | None              |",
       "| `--json`              |       | Output NDJSON for programmatic use                                                                                            | Off               |",
       "| `--quiet`             | `-q`  | Only output final result                                                                                                      | Off               |",
       "",
+      "### CLI Goal Runs",
+      "",
+      "Use `--goal` when a task should keep going across automatic continuations until the agent marks the persisted goal complete:",
+      "",
+      "```bash",
+      "# Goal text is also used as the initial message when no message/stdin is provided",
+      'mux run --goal "Fix the failing tests and verify the suite passes"',
+      "",
+      "# Provide separate kickoff instructions while keeping the objective active",
+      'mux run --goal "Ship the migration safely" "Start by inspecting the schema and propose a plan"',
+      "",
+      "# Bound automatic continuations with a goal-specific budget and turn cap",
+      'mux run --goal "Complete the refactor" --goal-budget 5.00 --goal-turns 10',
+      "```",
+      "",
+      "A CLI Goal Run is intentionally not a strict alias for interactive `/goal`. It is ephemeral to the `mux run` process, does not apply interactive goal defaults, bypasses the interactive continuation cooldown, and exits successfully only when the persisted goal status is `complete`. If neither `--goal-budget` nor `--goal-turns` is provided, Mux warns that the goal is uncapped.",
+      "",
+      "`--budget` remains the hard session spending limit in USD. `--goal-budget` is goal accounting, accepts forms like `$5`, `5.00`, and `500c`, and may allow a final budget-limit wrap-up turn. If the session `--budget` is exceeded, the run stops immediately.",
+      "",
+      "Exit codes for CLI Goal Runs:",
+      "",
+      "| Code  | Meaning                                                    |",
+      "| ----- | ---------------------------------------------------------- |",
+      "| `0`   | Goal completed (unless the agent set a nonzero exit code)  |",
+      "| `1`   | Operational, model, or tool error                          |",
+      "| `2`   | Session `--budget` exceeded                                |",
+      "| `3`   | Goal stopped incomplete, including goal budget/turn limits |",
+      "| `130` | User interrupt                                             |",
+      "",
       "### Runtimes",
       "",
       "- **`local`** (default): Runs directly in the specified directory. Best for one-off tasks.",
@@ -5319,6 +5386,9 @@ export const BUILTIN_SKILL_FILES: Record<string, Record<string, string>> = {
       "### Examples",
       "",
       "```bash",
+      "# Goal run with automatic continuations",
+      'mux run --goal "Update dependencies, fix resulting tests, and verify the suite passes"',
+      "",
       "# Quick fix in current directory",
       'mux run "Fix the TypeScript errors"',
       "",
@@ -6599,6 +6669,7 @@ export const BUILTIN_SKILL_FILES: Record<string, Record<string, string>> = {
       "    - Experimental Image Generation Tool (`/adr/0001-experimental-image-generation-tool`) → `references/docs/adr/0001-experimental-image-generation-tool.md` — Architecture decision for Mux's experimental image generation tool and generated-image display messages",
       "    - Image Editing Uses a Separate General-Purpose Tool (`/adr/0002-image-editing-visual-mockups`) → `references/docs/adr/0002-image-editing-visual-mockups.md` — Architecture decision for Mux's experimental image editing tool and edited image display messages",
       "    - Context Boundaries for Compaction and Reset (`/adr/0003-context-boundaries-for-compaction-and-reset`) → `references/docs/adr/0003-context-boundaries-for-compaction-and-reset.md` — Architecture decision for modeling provider context windows separately from transcript history",
+      "    - CLI Goal Runs are not strict /goal aliases (`/adr/0004-cli-goal-runs-are-not-strict-goal-aliases`) → `references/docs/adr/0004-cli-goal-runs-are-not-strict-goal-aliases.md` — Architecture decision for giving mux run --goal CLI-specific completion and limit semantics",
       "    - AGENTS.md (`/AGENTS`) → `references/docs/AGENTS.md` — Agent instructions for AI assistants working on the Mux codebase",
       "<!-- END DOCS_TREE -->",
       "",
diff --git a/src/node/services/coreServices.ts b/src/node/services/coreServices.ts
index c672c9d28a..980f5a2f3a 100644
--- a/src/node/services/coreServices.ts
+++ b/src/node/services/coreServices.ts
@@ -16,6 +16,7 @@ import { log } from "@/node/services/log";
 import {
   WorkspaceGoalService,
   type GoalLifecycleAnalyticsSink,
+  type WorkspaceGoalServiceOptions,
 } from "@/node/services/workspaceGoalService";
 import { MCPConfigService } from "@/node/services/mcpConfigService";
 import { MCPServerManager, type MCPServerManagerOptions } from "@/node/services/mcpServerManager";
@@ -41,6 +42,7 @@ export interface CoreServicesOptions {
   policyService?: PolicyService;
   telemetryService?: TelemetryService;
   analyticsService?: GoalLifecycleAnalyticsSink;
+  goalServiceOptions?: WorkspaceGoalServiceOptions;
   experimentsService?: ExperimentsService;
   sessionTimingService?: SessionTimingService;
   opResolver?: ExternalSecretResolver;
@@ -83,7 +85,8 @@ export function createCoreServices(opts: CoreServicesOptions): CoreServices {
     config,
     historyService,
     extensionMetadata,
-    opts.analyticsService
+    opts.analyticsService,
+    opts.goalServiceOptions
   );
 
   const aiService = new AIService(
diff --git a/src/node/services/workspaceGoalService.test.ts b/src/node/services/workspaceGoalService.test.ts
index 1860c05a20..763cdbd1a1 100644
--- a/src/node/services/workspaceGoalService.test.ts
+++ b/src/node/services/workspaceGoalService.test.ts
@@ -434,6 +434,44 @@ describe("WorkspaceGoalService", () => {
     );
   });
 
+  test("can suppress setGoal kickoff continuation for CLI-controlled kickoff", async () => {
+    service = new WorkspaceGoalService(config, historyService, extensionMetadata, analytics, {
+      suppressKickoffContinuation: true,
+    });
+    const dispatcher = new IdleDispatcher();
+    const execute = mock(() => Promise.resolve(true));
+    service.registerGoalContinuationConsumer(dispatcher, continuationBridge(execute));
+
+    await setGoalOk(service, { workspaceId, objective: "Wait for the CLI kickoff message" });
+    await dispatcher.requestDispatch(workspaceId, GOAL_CONTINUATION_IDLE_CONSUMER_NAME);
+
+    expect(execute).not.toHaveBeenCalled();
+  });
+
+  test("allows zero cooldown for immediate CLI-style continuations", async () => {
+    service = new WorkspaceGoalService(config, historyService, extensionMetadata, analytics, {
+      continuationCooldownMs: 0,
+    });
+    await setGoalOk(service, { workspaceId, objective: "Keep going without idle delay" });
+    const dispatcher = new IdleDispatcher();
+    const execute = mock(() => Promise.resolve(true));
+    service.registerGoalContinuationConsumer(dispatcher, continuationBridge(execute));
+
+    await service.requestContinuationAfterStreamEnd({
+      workspaceId,
+      sendOptions: { model: "openai:gpt-4o", agentId: "exec" },
+      streamEndedAtMs: 10_000,
+    });
+    await service.requestContinuationAfterStreamEnd({
+      workspaceId,
+      sendOptions: { model: "openai:gpt-4o", agentId: "exec" },
+      streamEndedAtMs: 10_001,
+    });
+    await dispatcher.requestDispatch(workspaceId, GOAL_CONTINUATION_IDLE_CONSUMER_NAME);
+
+    expect(execute).toHaveBeenCalledTimes(2);
+  });
+
   test("dispatches one budget-limit wrap-up after a continuation-origin stream exhausts the budget", async () => {
     const created = await setGoalOk(service, {
       workspaceId,
@@ -744,6 +782,39 @@ describe("WorkspaceGoalService", () => {
     });
   });
 
+  test("can allow budget-limit wrap-up after user-origin stream exhaustion", async () => {
+    service = new WorkspaceGoalService(config, historyService, extensionMetadata, analytics, {
+      allowUserOriginBudgetWrapup: true,
+    });
+    const created = await setGoalOk(service, {
+      workspaceId,
+      objective: "CLI owns over-budget kickoff",
+      budgetCents: 100,
+    });
+    const dispatcher = new IdleDispatcher();
+    const execute = mock(() => Promise.resolve(true));
+    service.registerGoalContinuationConsumer(dispatcher, continuationBridge(execute));
+
+    await service.recordStreamAccounting({
+      workspaceId,
+      costUsd: 1.25,
+      streamStartedAtMs: created.createdAtMs + 1,
+      streamOriginKind: "user",
+    });
+    await service.requestContinuationAfterStreamEnd({
+      workspaceId,
+      sendOptions: { model: "openai:gpt-4o", agentId: "exec" },
+      streamEndedAtMs: 20_000,
+    });
+    await dispatcher.requestDispatch(workspaceId, GOAL_CONTINUATION_IDLE_CONSUMER_NAME);
+
+    expect(execute).toHaveBeenCalledTimes(1);
+    expect(await service.getGoal(workspaceId)).toMatchObject({
+      status: "budget_limited",
+      budgetLimitInjectedForGoalId: created.goalId,
+    });
+  });
+
   test("recoverPendingDispatchAfterRestart re-arms a stranded budget_limited wrap-up", async () => {
     // Regression: Simulates a process
     // restart by:
diff --git a/src/node/services/workspaceGoalService.ts b/src/node/services/workspaceGoalService.ts
index ecaeab37bb..4f9c1c5bb1 100644
--- a/src/node/services/workspaceGoalService.ts
+++ b/src/node/services/workspaceGoalService.ts
@@ -329,8 +329,20 @@ function continuationSendOptions(sendOptions: SendMessageOptions): SendMessageOp
   return pickStartupRetrySendOptions(sendOptions) as SendMessageOptions;
 }
 
+export interface WorkspaceGoalServiceOptions {
+  /** Override interactive continuation cooldown; CLI goal runs use 0 to drive immediately. */
+  continuationCooldownMs?: number;
+  /** Allow CLI kickoff turns to receive the same budget-limit wrap-up as continuations. */
+  allowUserOriginBudgetWrapup?: boolean;
+  /** Prevent setGoal from queuing an automatic kickoff when the CLI sends its own message. */
+  suppressKickoffContinuation?: boolean;
+}
+
 export class WorkspaceGoalService {
   private readonly fileLocks = workspaceFileLocks;
+  private readonly continuationCooldownMs: number;
+  private readonly allowUserOriginBudgetWrapup: boolean;
+  private readonly suppressKickoffContinuation: boolean;
   private readonly pendingGoalMutations = new Map<string, PendingGoalMutation>();
   private readonly pendingGoalSnapshots = new Map<string, GoalSnapshot>();
 
@@ -359,8 +371,18 @@ export class WorkspaceGoalService {
     private readonly config: Config,
     private readonly historyService: HistoryService,
     private readonly extensionMetadata: ExtensionMetadataService,
-    private readonly analytics?: GoalLifecycleAnalyticsSink
-  ) {}
+    private readonly analytics?: GoalLifecycleAnalyticsSink,
+    options: WorkspaceGoalServiceOptions = {}
+  ) {
+    this.continuationCooldownMs =
+      options.continuationCooldownMs ?? DEFAULT_GOAL_CONTINUATION_COOLDOWN_MS;
+    this.allowUserOriginBudgetWrapup = options.allowUserOriginBudgetWrapup === true;
+    this.suppressKickoffContinuation = options.suppressKickoffContinuation === true;
+    assert(
+      Number.isFinite(this.continuationCooldownMs) && this.continuationCooldownMs >= 0,
+      "WorkspaceGoalService requires a non-negative continuation cooldown"
+    );
+  }
 
   setOnActivityChange(
     listener: (workspaceId: string, snapshot: WorkspaceActivitySnapshot) => void
@@ -1012,12 +1034,12 @@ export class WorkspaceGoalService {
     const lastContinuationFiredAtMs = goal.lastContinuationFiredAtMs ?? null;
     if (
       lastContinuationFiredAtMs != null &&
-      nowMs - lastContinuationFiredAtMs < DEFAULT_GOAL_CONTINUATION_COOLDOWN_MS
+      nowMs - lastContinuationFiredAtMs < this.continuationCooldownMs
     ) {
       return {
         eligible: false,
         reason: "cooldown",
-        deferUntilMs: lastContinuationFiredAtMs + DEFAULT_GOAL_CONTINUATION_COOLDOWN_MS,
+        deferUntilMs: lastContinuationFiredAtMs + this.continuationCooldownMs,
       };
     }
 
@@ -1115,7 +1137,7 @@ export class WorkspaceGoalService {
   }
 
   private isBudgetWrapupEligibleOrigin(originKind: GoalStreamOriginKind): boolean {
-    return originKind !== "user";
+    return this.allowUserOriginBudgetWrapup || originKind !== "user";
   }
 
   private async tryMarkBudgetLimitInjected(
@@ -1836,6 +1858,9 @@ export class WorkspaceGoalService {
   }
 
   private armKickoffContinuationIfIdle(workspaceId: string, goal: GoalRecordV1): void {
+    if (this.suppressKickoffContinuation) {
+      return;
+    }
     if (goal.status !== "active") {
       return;
     }