diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..babc70b
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,98 @@
+name: Release
+
+on:
+  push:
+    tags:
+      - "v*"
+
+permissions:
+  contents: write
+
+env:
+  RUSTFLAGS: "-D warnings"
+
+jobs:
+  check:
+    name: CI checks
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: dtolnay/rust-toolchain@stable
+        with:
+          components: rustfmt, clippy
+
+      - uses: Swatinem/rust-cache@v2
+
+      - name: Rust checks
+        run: cargo fmt --check && cargo clippy -- -D warnings && cargo build && cargo test
+
+  build:
+    name: Build ${{ matrix.target }}
+    needs: check
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - target: aarch64-apple-darwin
+            os: macos-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: ${{ matrix.target }}
+
+      - uses: Swatinem/rust-cache@v2
+
+      - name: Build release binary
+        run: cargo build --release --target ${{ matrix.target }}
+
+      - name: Package artifact
+        shell: bash
+        run: |
+          tag="${GITHUB_REF#refs/tags/}"
+          name="bugatti-${tag}-${{ matrix.target }}"
+          mkdir -p "dist/${name}"
+          cp "target/${{ matrix.target }}/release/bugatti" "dist/${name}/"
+          cd dist
+          tar czf "${name}.tar.gz" "${name}"
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: bugatti-${{ matrix.target }}
+          path: dist/*.tar.gz
+
+  release:
+    name: Create Release
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download all artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: artifacts
+          merge-multiple: true
+
+      - name: Generate checksums
+        run: |
+          cd artifacts
+          sha256sum *.tar.gz > checksums-sha256.txt
+
+      - name: Create GitHub Release
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          tag="${GITHUB_REF#refs/tags/}"
+          gh release create "${tag}" \
+            --title "${tag}" \
+            --generate-notes \
+            --draft \
+            artifacts/*.tar.gz \
+            artifacts/checksums-sha256.txt
diff --git a/Cargo.lock b/Cargo.lock
index f266bd6..e26fe2d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -90,7 +90,7 @@ dependencies = [
 
 [[package]]
 name = "bugatti"
-version = "0.1.0"
+version = "0.2.0"
 dependencies = [
  "chrono",
  "clap",
diff --git a/Cargo.toml b/Cargo.toml
index 9f7947b..65fc807 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "bugatti"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2021"
 description = "A CLI for plain-English, agent-assisted local application verification using *.test.toml files"
 
diff --git a/install.sh b/install.sh
index d8ebb33..e530417 100755
--- a/install.sh
+++ b/install.sh
@@ -1,11 +1,12 @@
 #!/bin/sh
 # install.sh — Bugatti CLI installer.
 #
-# Builds from source (requires Rust/Cargo) and installs the binary.
+# When run via curl|sh, downloads the pre-built binary from GitHub Releases.
+# When run from a repo checkout, builds from source (requires Rust/Cargo).
 #
 # Usage:
 #   curl -sSf https://raw.githubusercontent.com/codesoda/bugatti-cli/main/install.sh | sh
-#   ./install.sh [options]       # from a repo checkout
+#   ./install.sh [options]       # from a repo checkout (builds from source)
 #
 # Options:
 #   --skip-symlink      Skip creating ~/.local/bin symlink
@@ -142,41 +143,64 @@ trap cleanup EXIT INT TERM
 INSTALLED_BINARY=""
 SOURCE_ROOT=""
 
-# --- Resolve source tree ---
+# --- Detect architecture ---
 
-resolve_source_root() {
-    # If invoked from a repo checkout, use it directly
-    script_dir="$(cd "$(dirname "$0")" && pwd)"
-    if [ -f "$script_dir/Cargo.toml" ] && [ -d "$script_dir/src" ]; then
-        SOURCE_ROOT="$script_dir"
-        return 0
-    fi
+detect_target() {
+    os="$(uname -s)"
+    arch="$(uname -m)"
+
+    case "$os" in
+        Darwin) ;;
+        *) die "Pre-built binaries are only available for macOS (got $os). Build from a clone instead." ;;
+    esac
 
-    # Download source archive
+    case "$arch" in
+        arm64|aarch64) echo "aarch64-apple-darwin" ;;
+        *) die "Pre-built binaries are only available for arm64 (got $arch). Build from a clone instead." ;;
+    esac
+}
+
+# --- Install from GitHub release ---
+
+install_from_release() {
     if ! command -v curl >/dev/null 2>&1; then
         die "curl is required for remote install"
     fi
 
-    info "Downloading source from GitHub..."
-    TMP_DIR="$(mktemp -d)"
-    archive_url="https://github.com/$REPO_OWNER/$REPO_NAME/archive/refs/heads/$REPO_REF.tar.gz"
+    target="$(detect_target)"
 
-    if ! curl -sSL "$archive_url" | tar xz -C "$TMP_DIR" 2>/dev/null; then
-        die "Failed to download source from $archive_url"
+    header "Fetching latest release"
+
+    # Get the latest release tag
+    latest_url="https://api.github.com/repos/$REPO_OWNER/$REPO_NAME/releases/latest"
+    tag="$(curl -sSf "$latest_url" | grep '"tag_name"' | head -1 | sed 's/.*"tag_name": *"\([^"]*\)".*/\1/')"
+    if [ -z "$tag" ]; then
+        die "Could not determine latest release — check https://github.com/$REPO_OWNER/$REPO_NAME/releases"
+    fi
+    ok_detail "Release" "$tag"
+
+    asset_name="bugatti-${tag}-${target}.tar.gz"
+    asset_url="https://github.com/$REPO_OWNER/$REPO_NAME/releases/download/${tag}/${asset_name}"
+
+    TMP_DIR="$(mktemp -d)"
+    info "Downloading $asset_name..."
+    if ! curl -sSfL "$asset_url" -o "$TMP_DIR/$asset_name"; then
+        die "Failed to download $asset_url"
     fi
+    ok "Downloaded"
 
-    extracted="$TMP_DIR/$REPO_NAME-$REPO_REF"
-    if [ ! -f "$extracted/Cargo.toml" ]; then
-        die "Downloaded archive does not contain expected source tree"
+    tar xzf "$TMP_DIR/$asset_name" -C "$TMP_DIR"
+    downloaded_binary="$TMP_DIR/bugatti-${tag}-${target}/bugatti"
+    if [ ! -f "$downloaded_binary" ]; then
+        die "Archive does not contain expected binary"
     fi
 
-    SOURCE_ROOT="$extracted"
+    install_binary "$downloaded_binary"
 }
 
-# --- Build from source ---
+# --- Build from local source ---
 
 build_from_source() {
-    resolve_source_root
     ok_detail "Source tree" "$SOURCE_ROOT"
 
     header "Checking prerequisites"
@@ -196,8 +220,13 @@ build_from_source() {
     fi
 
     ok_detail "Built" "$built_binary"
+    install_binary "$built_binary"
+}
 
-    # Install to ~/.<home>/bin — use symlink for local checkouts, copy otherwise
+# --- Install binary to BUGATTI_HOME ---
+
+install_binary() {
+    src_binary="$1"
     bugatti_home="${BUGATTI_HOME:-$HOME/.bugatti}"
     bin_dir="$bugatti_home/bin"
     mkdir -p "$bin_dir"
@@ -209,7 +238,7 @@ build_from_source() {
         rm "$target_path"
     fi
 
-    cp "$built_binary" "$target_path"
+    cp "$src_binary" "$target_path"
     chmod +x "$target_path"
     ok_detail "Installed" "$target_path"
 
@@ -277,7 +306,14 @@ main() {
     dim "━━━━━━━━━━━━━━━━━"
     printf '\n'
 
-    build_from_source
+    # If running from a repo checkout, build locally; otherwise grab the release binary
+    script_dir="$(cd "$(dirname "$0")" && pwd)"
+    if [ -f "$script_dir/Cargo.toml" ] && [ -d "$script_dir/src" ]; then
+        SOURCE_ROOT="$script_dir"
+        build_from_source
+    else
+        install_from_release
+    fi
 
     if [ "$SKIP_SYMLINK" = 0 ]; then
         ensure_local_bin_symlink || true
diff --git a/src/command.rs b/src/command.rs
index 9db90b5..0cbe223 100644
--- a/src/command.rs
+++ b/src/command.rs
@@ -196,6 +196,9 @@ fn execute_short_lived(
 
     if !output.status.success() {
         tracing::error!(command = name, exit_code = ?exit_code, "short-lived command failed");
+        // Print last lines of output so the user can see what went wrong
+        print_output_tail("stderr", &output.stderr);
+        print_output_tail("stdout", &output.stdout);
         return Err(CommandError::NonZeroExit {
             name: name.to_string(),
             exit_code,
@@ -212,6 +215,20 @@ fn execute_short_lived(
     })
 }
 
+/// Print the last non-empty lines of command output, prefixed with a label.
+fn print_output_tail(label: &str, output: &[u8]) {
+    let text = String::from_utf8_lossy(output);
+    let lines: Vec<&str> = text.lines().filter(|l| !l.trim().is_empty()).collect();
+    if lines.is_empty() {
+        return;
+    }
+    let tail: Vec<&str> = lines.into_iter().rev().take(10).collect::<Vec<_>>().into_iter().rev().collect();
+    println!("  {label}:");
+    for line in tail {
+        println!("    {line}");
+    }
+}
+
 /// Default timeout for readiness checks (30 seconds).
 const DEFAULT_READINESS_TIMEOUT: Duration = Duration::from_secs(30);
 
@@ -266,19 +283,23 @@ pub fn spawn_long_lived_commands(
         if skip_cmds.contains(name) {
             println!("SKIP ....... {name} (long-lived)");
             // Readiness checks still run for skipped commands unless explicitly disabled
-            if let Some(ref readiness_url) = def.readiness_url {
+            let urls = def.effective_readiness_urls();
+            if !urls.is_empty() {
                 if skip_readiness.contains(name) {
                     println!("SKIP ....... {name} readiness check (--skip-readiness)");
                 } else {
-                    println!("WAIT ....... {name} (skipped): polling {readiness_url}");
-                    if let Err(e) = poll_readiness(readiness_url, DEFAULT_READINESS_TIMEOUT) {
-                        println!("FAIL ....... {name} (skipped): readiness check failed");
-                        teardown_processes(&mut tracked);
-                        return Err(CommandError::ReadinessFailed {
-                            name: name.to_string(),
-                            url: readiness_url.clone(),
-                            message: e,
-                        });
+                    let timeout = readiness_timeout(def);
+                    for url in &urls {
+                        println!("WAIT ....... {name} (skipped): polling {url}");
+                        if let Err(e) = poll_readiness(url, timeout) {
+                            println!("FAIL ....... {name} (skipped): readiness check failed");
+                            teardown_processes(&mut tracked);
+                            return Err(CommandError::ReadinessFailed {
+                                name: name.to_string(),
+                                url: url.to_string(),
+                                message: e,
+                            });
+                        }
                     }
                     println!("READY ...... {name} (skipped)");
                 }
@@ -325,17 +346,21 @@ pub fn spawn_long_lived_commands(
         tracked.push(process);
 
         // Check readiness if configured
-        if let Some(ref readiness_url) = def.readiness_url {
-            println!("WAIT ....... {name}: polling {readiness_url}");
-            if let Err(e) = poll_readiness(readiness_url, DEFAULT_READINESS_TIMEOUT) {
-                // Readiness failed - tear down what we've started
-                println!("FAIL ....... {name}: readiness check failed");
-                teardown_processes(&mut tracked);
-                return Err(CommandError::ReadinessFailed {
-                    name: name.to_string(),
-                    url: readiness_url.clone(),
-                    message: e,
-                });
+        let urls = def.effective_readiness_urls();
+        if !urls.is_empty() {
+            let timeout = readiness_timeout(def);
+            for url in &urls {
+                println!("WAIT ....... {name}: polling {url}");
+                if let Err(e) = poll_readiness(url, timeout) {
+                    // Readiness failed - tear down what we've started
+                    println!("FAIL ....... {name}: readiness check failed");
+                    teardown_processes(&mut tracked);
+                    return Err(CommandError::ReadinessFailed {
+                        name: name.to_string(),
+                        url: url.to_string(),
+                        message: e,
+                    });
+                }
             }
             println!("READY ...... {name}");
         }
@@ -344,6 +369,13 @@ pub fn spawn_long_lived_commands(
     Ok(tracked)
 }
 
+/// Compute the readiness timeout for a command, using the per-command override or the default.
+fn readiness_timeout(def: &CommandDef) -> Duration {
+    def.readiness_timeout_secs
+        .map(Duration::from_secs)
+        .unwrap_or(DEFAULT_READINESS_TIMEOUT)
+}
+
 /// Poll a readiness URL until it responds with a success status or timeout.
 fn poll_readiness(url: &str, timeout: Duration) -> Result<(), String> {
     tracing::info!(
@@ -518,6 +550,8 @@ mod tests {
                     kind,
                     cmd: cmd.to_string(),
                     readiness_url: readiness_url.map(String::from),
+                    readiness_urls: Vec::new(),
+                    readiness_timeout_secs: None,
                 },
             );
         }
diff --git a/src/config.rs b/src/config.rs
index bd37da2..199d41f 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -54,8 +54,28 @@ impl Default for ProviderConfig {
 pub struct CommandDef {
     pub kind: CommandKind,
     pub cmd: String,
+    /// Single readiness URL (convenience shorthand — mutually exclusive with `readiness_urls`).
     #[serde(default)]
     pub readiness_url: Option<String>,
+    /// Multiple readiness URLs to poll before the command is considered ready.
+    #[serde(default)]
+    pub readiness_urls: Vec<String>,
+    /// Timeout in seconds for readiness polling (default: 30).
+    #[serde(default)]
+    pub readiness_timeout_secs: Option<u64>,
+}
+
+impl CommandDef {
+    /// Return the effective list of readiness URLs, merging `readiness_url` and `readiness_urls`.
+    pub fn effective_readiness_urls(&self) -> Vec<&str> {
+        let mut urls: Vec<&str> = self.readiness_urls.iter().map(|s| s.as_str()).collect();
+        if let Some(ref url) = self.readiness_url {
+            if !urls.iter().any(|u| *u == url.as_str()) {
+                urls.insert(0, url.as_str());
+            }
+        }
+        urls
+    }
 }
 
 /// Whether a command is short-lived (run to completion) or long-lived (background process).
@@ -338,6 +358,7 @@ readiness_url = "http://localhost:3000/health"
                 include_path: None,
                 include_glob: None,
                 step_timeout_secs: None,
+                skip: false,
             }],
         };
 
diff --git a/src/executor.rs b/src/executor.rs
index 699ba3c..a3ff770 100644
--- a/src/executor.rs
+++ b/src/executor.rs
@@ -237,6 +237,7 @@ pub struct BootstrapConfig<'a> {
     pub test_file: &'a str,
     pub extra_system_prompt: Option<&'a str>,
     pub base_url: Option<&'a str>,
+    pub artifact_dir: &'a ArtifactDir,
 }
 
 /// Build the bootstrap message content sent to the provider at session start.
@@ -286,6 +287,14 @@ pub fn build_bootstrap_content(
         content.push_str("\nAll URLs in step instructions are relative to the Base URL unless a full URL (with host) is provided.\n");
     }
 
+    // Artifact directories
+    content.push_str("\n## Artifacts\n\n");
+    content.push_str("Save any files produced during the test run to these directories:\n\n");
+    content.push_str(&format!("- **Root**: `{}`\n", config.artifact_dir.root.display()));
+    content.push_str(&format!("- **Screenshots**: `{}`\n", config.artifact_dir.screenshots.display()));
+    content.push_str(&format!("- **Logs**: `{}`\n", config.artifact_dir.logs.display()));
+    content.push_str("\nScreenshots, videos, downloaded files, and any other evidence should be saved to the appropriate directory above.\n");
+
     content
 }
 
@@ -385,16 +394,39 @@ pub fn execute_steps(
 
         // Print step begin
         let instruction_summary = truncate_instruction(&step.instruction, 60);
+        let display_source = std::env::current_dir()
+            .ok()
+            .and_then(|cwd| step.source_file.strip_prefix(&cwd).ok().map(|p| p.display().to_string()))
+            .unwrap_or_else(|| step.source_file.display().to_string());
+
+        // Handle skipped steps
+        if step.skip {
+            println!(
+                "SKIP {}/{} ... {} (from {})",
+                step.step_id + 1,
+                total_steps,
+                instruction_summary,
+                display_source
+            );
+            outcomes.push(StepOutcome {
+                step_id: step.step_id,
+                instruction: step.instruction.clone(),
+                source_file: step.source_file.clone(),
+                result: StepResult::Verdict(StepVerdict::Ok),
+                transcript: String::new(),
+                log_events: vec![],
+                evidence_refs: vec![],
+                duration: Duration::ZERO,
+            });
+            continue;
+        }
+
         tracing::info!(
             step_id = step.step_id,
             total = total_steps,
             source = %step.source_file.display(),
             "step execution begin"
         );
-        let display_source = std::env::current_dir()
-            .ok()
-            .and_then(|cwd| step.source_file.strip_prefix(&cwd).ok().map(|p| p.display().to_string()))
-            .unwrap_or_else(|| step.source_file.display().to_string());
         println!(
             "STEP {}/{} ... {} (from {})",
             step.step_id + 1,
@@ -880,6 +912,7 @@ mod tests {
                 source_step_index: 0,
                 parent_chain: vec![],
                 step_timeout_secs: None,
+                skip: false,
             },
             ExpandedStep {
                 step_id: 1,
@@ -888,6 +921,7 @@ mod tests {
                 source_step_index: 1,
                 parent_chain: vec![],
                 step_timeout_secs: None,
+                skip: false,
             },
         ]
     }
@@ -1522,11 +1556,13 @@ mod tests {
 
     #[test]
     fn build_bootstrap_content_includes_result_contract() {
+        let (_tmp, artifact_dir) = test_artifact_dir();
         let config = BootstrapConfig {
             test_name: "Login test",
             test_file: "tests/login.test.toml",
             extra_system_prompt: None,
             base_url: None,
+            artifact_dir: &artifact_dir,
         };
         let run_id = RunId("run-1".to_string());
         let session_id = SessionId("sess-1".to_string());
@@ -1540,11 +1576,13 @@ mod tests {
 
     #[test]
     fn build_bootstrap_content_includes_test_metadata() {
+        let (_tmp, artifact_dir) = test_artifact_dir();
         let config = BootstrapConfig {
             test_name: "Login test",
             test_file: "tests/login.test.toml",
             extra_system_prompt: None,
             base_url: None,
+            artifact_dir: &artifact_dir,
         };
         let run_id = RunId("run-abc".to_string());
         let session_id = SessionId("sess-xyz".to_string());
@@ -1559,11 +1597,13 @@ mod tests {
 
     #[test]
     fn build_bootstrap_content_includes_extra_system_prompt() {
+        let (_tmp, artifact_dir) = test_artifact_dir();
         let config = BootstrapConfig {
             test_name: "Test",
             test_file: "test.test.toml",
             extra_system_prompt: Some("Be concise and thorough"),
             base_url: None,
+            artifact_dir: &artifact_dir,
         };
         let run_id = RunId("run-1".to_string());
         let session_id = SessionId("sess-1".to_string());
@@ -1578,11 +1618,13 @@ mod tests {
 
     #[test]
     fn build_bootstrap_content_omits_prompt_when_none() {
+        let (_tmp, artifact_dir) = test_artifact_dir();
         let config = BootstrapConfig {
             test_name: "Test",
             test_file: "test.test.toml",
             extra_system_prompt: None,
             base_url: None,
+            artifact_dir: &artifact_dir,
         };
         let run_id = RunId("run-1".to_string());
         let session_id = SessionId("sess-1".to_string());
@@ -1594,11 +1636,13 @@ mod tests {
 
     #[test]
     fn build_bootstrap_content_includes_base_url() {
+        let (_tmp, artifact_dir) = test_artifact_dir();
         let config = BootstrapConfig {
             test_name: "Test",
             test_file: "test.test.toml",
             extra_system_prompt: None,
             base_url: Some("http://localhost:3000"),
+            artifact_dir: &artifact_dir,
         };
         let run_id = RunId("run-1".to_string());
         let session_id = SessionId("sess-1".to_string());
@@ -1608,11 +1652,13 @@ mod tests {
 
     #[test]
     fn build_bootstrap_content_omits_base_url_when_none() {
+        let (_tmp, artifact_dir) = test_artifact_dir();
         let config = BootstrapConfig {
             test_name: "Test",
             test_file: "test.test.toml",
             extra_system_prompt: None,
             base_url: None,
+            artifact_dir: &artifact_dir,
         };
         let run_id = RunId("run-1".to_string());
         let session_id = SessionId("sess-1".to_string());
@@ -1636,6 +1682,7 @@ mod tests {
             test_file: "test.test.toml",
             extra_system_prompt: None,
             base_url: None,
+            artifact_dir: &artifact_dir,
         };
 
         let _outcome = execute_steps(
diff --git a/src/exit_code.rs b/src/exit_code.rs
index f442249..bf1e8c9 100644
--- a/src/exit_code.rs
+++ b/src/exit_code.rs
@@ -10,6 +10,7 @@
 //! | 3    | Provider startup or readiness check failure |
 //! | 4    | Timeout during step execution |
 //! | 5    | Run was interrupted (Ctrl+C / SIGINT) |
+//! | 6    | Setup command failed (short-lived command exited non-zero) |
 
 /// All steps passed (OK or WARN only).
 pub const EXIT_OK: i32 = 0;
@@ -29,6 +30,9 @@ pub const EXIT_TIMEOUT: i32 = 4;
 /// Run was interrupted (Ctrl+C / SIGINT).
 pub const EXIT_INTERRUPTED: i32 = 5;
 
+/// A setup command (short-lived) failed during the setup phase.
+pub const EXIT_SETUP_ERROR: i32 = 6;
+
 use crate::executor::{RunOutcome, StepResult, StepVerdict};
 
 /// Compute the exit code for a single completed run.
@@ -103,6 +107,7 @@ pub fn describe_exit_code(code: i32) -> &'static str {
         EXIT_PROVIDER_ERROR => "provider or readiness failure",
         EXIT_TIMEOUT => "step execution timeout",
         EXIT_INTERRUPTED => "run interrupted",
+        EXIT_SETUP_ERROR => "setup command failed",
         _ => "unknown exit code",
     }
 }
diff --git a/src/expand.rs b/src/expand.rs
index ef021a8..d8326b9 100644
--- a/src/expand.rs
+++ b/src/expand.rs
@@ -17,6 +17,8 @@ pub struct ExpandedStep {
     pub parent_chain: Vec<PathBuf>,
     /// Optional per-step timeout override in seconds.
     pub step_timeout_secs: Option<u64>,
+    /// If true, this step is skipped during execution.
+    pub skip: bool,
 }
 
 /// Error type for step expansion.
@@ -107,6 +109,7 @@ fn expand_steps_inner(
                 source_step_index: i,
                 parent_chain: parent_chain.to_vec(),
                 step_timeout_secs: step.step_timeout_secs,
+                skip: step.skip,
             });
             *step_id += 1;
         } else if let Some(ref include_path) = step.include_path {
diff --git a/src/main.rs b/src/main.rs
index c9d3231..2467e6f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -11,7 +11,8 @@ use bugatti::diagnostics;
 use bugatti::discovery::{discover_root_tests, DiscoveredTest};
 use bugatti::executor;
 use bugatti::exit_code::{
-    self, EXIT_CONFIG_ERROR, EXIT_INTERRUPTED, EXIT_OK, EXIT_PROVIDER_ERROR, EXIT_STEP_ERROR,
+    self, EXIT_CONFIG_ERROR, EXIT_INTERRUPTED, EXIT_OK, EXIT_PROVIDER_ERROR, EXIT_SETUP_ERROR,
+    EXIT_STEP_ERROR,
 };
 use bugatti::expand;
 use bugatti::provider::AgentSession;
@@ -343,7 +344,7 @@ fn run_test_with_artifacts(ctx: &PipelineContext, steps: Vec<bugatti::expand::Ex
     // Phase 8: Run short-lived setup commands
     if let Err(e) = command::run_short_lived_commands(ctx.effective, ctx.artifact_dir, ctx.skip_cmds) {
         tracing::error!(error = %e, "short-lived command failed");
-        return ctx.fail_early(EXIT_CONFIG_ERROR, format!("setup command failed: {e}"), &mut no_processes);
+        return ctx.fail_early(EXIT_SETUP_ERROR, format!("setup command failed: {e}"), &mut no_processes);
     }
 
     // Phase 9: Spawn long-lived commands
@@ -388,6 +389,7 @@ fn run_test_with_artifacts(ctx: &PipelineContext, steps: Vec<bugatti::expand::Ex
         test_file: &ctx.test_path.display().to_string(),
         extra_system_prompt: ctx.effective.provider.extra_system_prompt.as_deref(),
         base_url: ctx.effective.provider.base_url.as_deref(),
+        artifact_dir: ctx.artifact_dir,
     };
     let step_timeout = ctx.effective.provider.step_timeout_secs
         .map(std::time::Duration::from_secs);
diff --git a/src/run.rs b/src/run.rs
index d987e8d..f689427 100644
--- a/src/run.rs
+++ b/src/run.rs
@@ -221,6 +221,8 @@ mod tests {
                 kind: CommandKind::ShortLived,
                 cmd: "cargo sqlx migrate run".to_string(),
                 readiness_url: None,
+                readiness_urls: Vec::new(),
+                readiness_timeout_secs: None,
             },
         );
         Config {
diff --git a/src/test_file.rs b/src/test_file.rs
index e26a6eb..02354f7 100644
--- a/src/test_file.rs
+++ b/src/test_file.rs
@@ -46,6 +46,9 @@ pub struct Step {
     pub include_glob: Option<String>,
     /// Optional per-step timeout override in seconds.
     pub step_timeout_secs: Option<u64>,
+    /// If true, this step is skipped during execution (counts as passed).
+    #[serde(default)]
+    pub skip: bool,
 }
 
 /// Error type for test file parsing.