From 4cc1676426ef7a8c9f15556508ccea6c3925c201 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Tue, 14 Apr 2026 19:19:20 -0700
Subject: [PATCH 01/25] emulator pull progress

---
 packages/stack-cli/src/commands/emulator.ts | 95 +++++++++++++++++++--
 1 file changed, 90 insertions(+), 5 deletions(-)
diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index d52463b15b..9544b0c890 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -1,8 +1,10 @@
 import { Command } from "commander";
 import { execFileSync, spawn } from "child_process";
-import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs";
+import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs";
 import { homedir } from "os";
 import { dirname, join, resolve } from "path";
+import { Readable } from "stream";
+import { pipeline } from "stream/promises";
 import { fileURLToPath } from "url";
 import { CliError } from "../lib/errors.js";
 
@@ -145,7 +147,7 @@ async function startEmulator(arch: "arm64" | "amd64"): Promise<void> {
   const img = join(emulatorImageDir(), `stack-emulator-${arch}.qcow2`);
   if (!existsSync(img)) {
     console.log("No emulator image found. Pulling latest...");
-    pullRelease(arch);
+    await pullRelease(arch);
   }
   await runEmulator("start", { EMULATOR_ARCH: arch });
 }
@@ -156,7 +158,7 @@ function resolveArch(raw?: string): "arm64" | "amd64" {
   throw new CliError(`Invalid architecture: ${raw ?? process.arch}. Expected arm64 or amd64.`);
 }
 
-function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string; branch?: string; tag?: string } = {}) {
+async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branch?: string, tag?: string } = {}) {
   const repo = opts.repo ?? "stack-auth/stack-auth";
   const branch = opts.branch ?? "dev";
   const tag = opts.tag ?? `emulator-${branch}-latest`;
@@ -168,15 +170,98 @@ function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string; branch?: st
 
   console.log(`Pulling ${asset} from release ${tag}...`);
   try {
-    execFileSync("gh", ["release", "download", tag, "--repo", repo, "--pattern", asset, "--output", tmpDest, "--clobber"], { stdio: "inherit" });
+    const assets = JSON.parse(gh(["release", "view", tag, "--repo", repo, "--json", "assets"])) as {
+      assets: { name: string, apiUrl: string, size: number }[],
+    };
+    const match = assets.assets.find((a) => a.name === asset);
+    if (!match) {
+      throw new CliError(`Asset ${asset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`);
+    }
+    const token = gh(["auth", "token"]);
+    await downloadWithProgress(match.apiUrl, {
+      Authorization: `Bearer ${token}`,
+      Accept: "application/octet-stream",
+    }, tmpDest, match.size);
   } catch (err) {
     if (existsSync(tmpDest)) unlinkSync(tmpDest);
+    if (err instanceof CliError) throw err;
     throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}\nRun 'stack emulator list-releases' to see available releases.`);
   }
   renameSync(tmpDest, dest);
   console.log(`Downloaded: ${dest}`);
 }
 
+async function downloadWithProgress(url: string, headers: Record<string, string>, dest: string, totalBytes?: number): Promise<void> {
+  const res = await fetch(url, { headers, redirect: "follow" });
+  if (!res.ok || !res.body) {
+    throw new CliError(`Download failed (${res.status} ${res.statusText}): ${url}`);
+  }
+  const total = totalBytes ?? (Number(res.headers.get("content-length")) || 0);
+  const isTty = Boolean(process.stderr.isTTY);
+  const startedAt = Date.now();
+  let downloaded = 0;
+  let lastRender = 0;
+
+  const render = (final: boolean) => {
+    const now = Date.now();
+    if (!final && now - lastRender < 100) return;
+    lastRender = now;
+    const elapsed = Math.max(0.001, (now - startedAt) / 1000);
+    const speed = downloaded / elapsed;
+    const line = renderProgressLine(downloaded, total, speed);
+    if (isTty) {
+      process.stderr.write(`\r\x1b[2K${line}`);
+    } else if (final) {
+      process.stderr.write(`${line}\n`);
+    }
+  };
+
+  const body = Readable.fromWeb(res.body as Parameters<typeof Readable.fromWeb>[0]);
+  body.on("data", (chunk: Buffer) => {
+    downloaded += chunk.byteLength;
+    render(false);
+  });
+  await pipeline(body, createWriteStream(dest));
+  render(true);
+  if (isTty) process.stderr.write("\n");
+}
+
+function renderProgressLine(downloaded: number, total: number, bytesPerSec: number): string {
+  const barWidth = 30;
+  const pct = total > 0 ? Math.min(100, (downloaded / total) * 100) : 0;
+  const filled = total > 0 ? Math.round((downloaded / total) * barWidth) : 0;
+  const bar = "█".repeat(filled) + "░".repeat(Math.max(0, barWidth - filled));
+  const pctStr = total > 0 ? `${pct.toFixed(1).padStart(5)}%` : "  ?  ";
+  const sizeStr = total > 0 ? `${formatBytes(downloaded)}/${formatBytes(total)}` : formatBytes(downloaded);
+  const speedStr = `${formatBytes(bytesPerSec)}/s`;
+  const etaStr = total > 0 && bytesPerSec > 0 ? `  eta ${formatDuration((total - downloaded) / bytesPerSec)}` : "";
+  return `  [${bar}] ${pctStr}  ${sizeStr}  ${speedStr}${etaStr}`;
+}
+
+function formatBytes(bytes: number): string {
+  if (!Number.isFinite(bytes) || bytes < 0) return "?";
+  const units = ["B", "KB", "MB", "GB", "TB"];
+  let v = bytes;
+  let i = 0;
+  while (v >= 1024 && i < units.length - 1) {
+    v /= 1024;
+    i++;
+  }
+  return `${v.toFixed(v < 10 && i > 0 ? 1 : 0)} ${units[i]}`;
+}
+
+function formatDuration(seconds: number): string {
+  if (!Number.isFinite(seconds) || seconds < 0) return "?";
+  const s = Math.round(seconds);
+  if (s < 60) return `${s}s`;
+  const m = Math.floor(s / 60);
+  const rs = s % 60;
+  if (m < 60) return `${m}m${rs.toString().padStart(2, "0")}s`;
+  const h = Math.floor(m / 60);
+  const rm = m % 60;
+  return `${h}h${rm.toString().padStart(2, "0")}m`;
+}
+
 export function registerEmulatorCommand(program: Command) {
   const emulator = program.command("emulator").description("Manage the QEMU local emulator");
 
@@ -216,7 +301,7 @@ export function registerEmulatorCommand(program: Command) {
         if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`);
         console.log(`Downloaded: ${dest}`);
       } else {
-        pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag });
+        await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag });
       }
     });
 

From a65022b8f745afd175bf0473868fdfb9b68470a8 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 11:49:52 -0700
Subject: [PATCH 02/25] emulator fast-start via VM snapshot + live secret
 rotation

Ships a compressed RAM/device snapshot (stack-emulator-<arch>.savevm.zst)
alongside the qcow2. `emulator start` resumes from it and rotates the
per-install secrets in place, taking cold-boot from 30-120s to ~6-7s.

Build phase adds a STACKCFG runtime ISO so stack.service can boot during
image creation, starts qemu-guest-agent so its virtio-serial port stays
open in the snapshot, then stop+migrate file:+quit via QMP.

Runtime sends fresh secrets through QGA guest-exec input-data, which pipes
them to trigger-fast-rotate and rotate-secrets inside the container:
targeted sed on the placeholder PCK in built JS, UPDATE on the internal
ApiKeySet, supervisorctl restart stack-app + cron-jobs. Placeholder hex
values are baked in instead of random keys under STACK_EMULATOR_BUILD_SNAPSHOT=1
so no real secret ships in the snapshot.

Device topology and SMP must match at capture and resume; runtime adds
phantom seed/bundle drives and pins SMP=4. Cold-boot fallback kicks in
automatically when the snapshot is missing, corrupt, or incompatible.

supervisord.conf now uses stopasgroup/killasgroup for stack-app and
cron-jobs so supervisor restart actually kills the Node children (they
were keeping their port bindings and breaking rotation).
---
 .gitignore                                    |   3 +
 docker/local-emulator/Dockerfile              |   3 +-
 docker/local-emulator/entrypoint.sh           |   8 +-
 docker/local-emulator/qemu/build-image.sh     | 264 +++++++++++-
 .../qemu/cloud-init/emulator/user-data        | 170 +++++++-
 docker/local-emulator/qemu/run-emulator.sh    | 375 ++++++++++++++++--
 docker/local-emulator/rotate-secrets.sh       | 104 +++++
 docker/local-emulator/run-cron-jobs.sh        |   8 +
 docker/local-emulator/supervisord.conf        |  21 +
 docker/server/entrypoint.sh                   |  87 ++--
 packages/stack-cli/src/commands/emulator.ts   |  55 ++-
 11 files changed, 987 insertions(+), 111 deletions(-)
 create mode 100644 docker/local-emulator/rotate-secrets.sh

diff --git a/.gitignore b/.gitignore
index eab9ce0b48..8fa5c69404 100644
--- a/.gitignore
+++ b/.gitignore
@@ -144,3 +144,6 @@ packages/stack/*
 !packages/react/package.json
 !packages/next/package.json
 !packages/stack/package.json
+
+# claude code
+.claude/scheduled_tasks.lock
diff --git a/docker/local-emulator/Dockerfile b/docker/local-emulator/Dockerfile
index 56deae7882..138270b405 100644
--- a/docker/local-emulator/Dockerfile
+++ b/docker/local-emulator/Dockerfile
@@ -262,10 +262,11 @@ COPY docker/local-emulator/run-cron-jobs.sh /run-cron-jobs.sh
 COPY docker/local-emulator/entrypoint.sh /entrypoint.sh
 COPY docker/local-emulator/init-services.sh /init-services.sh
 COPY docker/local-emulator/start-app.sh /start-app.sh
+COPY docker/local-emulator/rotate-secrets.sh /usr/local/bin/rotate-secrets
 COPY docker/local-emulator/clickhouse-config.xml /etc/clickhouse-server/config.xml
 COPY docker/local-emulator/clickhouse-users.xml /etc/clickhouse-server/users.xml
 COPY docker/server/entrypoint.sh /app-entrypoint.sh
-RUN chmod +x /entrypoint.sh /init-services.sh /start-app.sh /app-entrypoint.sh /run-cron-jobs.sh
+RUN chmod +x /entrypoint.sh /init-services.sh /start-app.sh /app-entrypoint.sh /run-cron-jobs.sh /usr/local/bin/rotate-secrets
 
 # PostgreSQL: 5432, Redis: 6379, Inbucket: 2500/9001/1100,
 # Svix: 8071, ClickHouse: 8123/9009, MinIO: 9090, QStash: 8080
diff --git a/docker/local-emulator/entrypoint.sh b/docker/local-emulator/entrypoint.sh
index 562cb67955..58157c7914 100644
--- a/docker/local-emulator/entrypoint.sh
+++ b/docker/local-emulator/entrypoint.sh
@@ -33,6 +33,12 @@ fi
 # baked-in mock value from .env.development to be a usable credential against
 # a running emulator. Overriding here propagates to both the backend and the
 # run-cron-jobs.sh loop via supervisord's inherited environment.
-export CRON_SECRET="$(openssl rand -hex 32)"
+#
+# In snapshot-build mode the VM supplies a deterministic placeholder via the
+# --env-file so the baked snapshot doesn't contain a real secret; on resume,
+# /usr/local/bin/rotate-secrets swaps in a fresh per-install value.
+if [ -z "${CRON_SECRET:-}" ]; then
+  export CRON_SECRET="$(openssl rand -hex 32)"
+fi
 
 exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf
diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index f4d91771b7..0babc5e2b1 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -12,9 +12,22 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
 DEBIAN_VERSION="${DEBIAN_VERSION:-13}"
 DISK_SIZE="${EMULATOR_DISK_SIZE:-12G}"
 RAM="${EMULATOR_BUILD_RAM:-4096}"
-CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}"
+# Snapshot mode pins SMP to a fixed value so the runtime QEMU command (which
+# uses EMULATOR_CPUS, default 4) can match the source device topology — RAM
+# migration replay requires identical vCPU count.
+if [ "${EMULATOR_BUILD_SNAPSHOT:-1}" = "1" ]; then
+  CPUS="${EMULATOR_BUILD_CPUS:-4}"
+else
+  CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}"
+fi
 PROVISION_TIMEOUT="${EMULATOR_PROVISION_TIMEOUT:-3200}"
 EMULATOR_IMAGE_NAME="${EMULATOR_IMAGE_NAME:-stack-local-emulator}"
+# Snapshot build mode: bring the VM to a fully-warm state (backend + dashboard
+# responding), then capture RAM/device state via QMP so that `emulator start`
+# can -incoming from it and return in ~3-8s. Enabled by default; set
+# EMULATOR_BUILD_SNAPSHOT=0 to fall back to the legacy "shutdown after
+# provisioning" flow.
+EMULATOR_BUILD_SNAPSHOT="${EMULATOR_BUILD_SNAPSHOT:-1}"
 
 RED='\033[0;31m'
 GREEN='\033[0;32m'
@@ -51,6 +64,12 @@ check_deps() {
     command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
   done
 
+  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+    for cmd in socat zstd; do
+      command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
+    done
+  fi
+
   if ! command -v mkisofs >/dev/null 2>&1 && ! command -v genisoimage >/dev/null 2>&1 && ! command -v hdiutil >/dev/null 2>&1; then
     missing+=("mkisofs/genisoimage/hdiutil")
   fi
@@ -231,6 +250,116 @@ persist_provision_logs() {
   cp "$provision_log" "$IMAGE_DIR/provision-emulator-${arch}.progress.log" 2>/dev/null || true
 }
 
+# Open a persistent QMP session on the monitor socket, negotiate capabilities,
+# run a series of commands, and close. Commands are read from stdin (one JSON
+# object per line); responses are written to stdout. Uses socat's bidirectional
+# pipe so we can interleave request/response in one connection — QMP requires
+# qmp_capabilities to come first and keeps state across commands.
+qmp_session() {
+  local sock="$1"
+  socat -t30 - "UNIX-CONNECT:${sock}"
+}
+
+# Drive the snapshot capture over QMP:
+#   1. qmp_capabilities — exit negotiation mode.
+#   2. stop — pause the VM so no more disk writes happen.
+#   3. migrate to exec:zstd > <file via hostfs> — streams RAM/device state out.
+#   4. Poll query-migrate until status=completed (or failed).
+#   5. quit — terminate QEMU cleanly.
+capture_vm_state() {
+  local sock="$1"
+  local guest_path="$2"
+
+  if [ ! -S "$sock" ]; then
+    err "QMP monitor socket missing: $sock"
+    return 1
+  fi
+
+  log "  QMP: stopping VM..."
+  {
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' '{"execute":"stop"}'
+  } | qmp_session "$sock" >/dev/null || {
+    err "QMP stop failed"
+    return 1
+  }
+
+  log "  QMP: migrating RAM state to ${guest_path}..."
+  # Use file: migration (native QEMU) instead of exec: to avoid relying on a
+  # spawned shell finding zstd in PATH. We compress as a separate host step
+  # after migrate completes.
+  local migrate_cmd
+  migrate_cmd=$(printf '{"execute":"migrate","arguments":{"uri":"file:%s"}}' "$guest_path")
+  local migrate_resp
+  migrate_resp=$({
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' "$migrate_cmd"
+  } | qmp_session "$sock") || {
+    err "QMP migrate failed"
+    return 1
+  }
+  if printf '%s' "$migrate_resp" | grep -q '"error"[[:space:]]*:'; then
+    err "QMP migrate returned error: $migrate_resp"
+    return 1
+  fi
+
+  # Poll migration status. Migration runs in the background after the
+  # migrate command returns; we watch for "completed" or "failed".
+  local migrate_timeout=600
+  local waited=0
+  local last_heartbeat=0
+  while [ "$waited" -lt "$migrate_timeout" ]; do
+    local status_line status
+    status_line=$({
+      printf '%s\n' '{"execute":"qmp_capabilities"}'
+      printf '%s\n' '{"execute":"query-migrate"}'
+    } | qmp_session "$sock" 2>/dev/null || true)
+    status="$(printf '%s\n' "$status_line" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')"
+    case "$status" in
+      completed)
+        log "  QMP: migrate completed (${waited}s)"
+        break
+        ;;
+      failed|cancelled)
+        err "  QMP: migrate ended with status=$status"
+        err "  QMP response: $status_line"
+        return 1
+        ;;
+      active|setup|device|"")
+        # still running
+        if [ "$((waited - last_heartbeat))" -ge 30 ]; then
+          local transferred
+          transferred=$(printf '%s' "$status_line" | grep -o '"transferred"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
+          log "  QMP: migrate in progress (${waited}s, status=${status:-init}, transferred=${transferred:-0})"
+          last_heartbeat=$waited
+        fi
+        ;;
+      *)
+        log "  QMP: migrate status=$status (${waited}s)"
+        ;;
+    esac
+    sleep 2
+    waited=$((waited + 2))
+  done
+
+  if [ "$waited" -ge "$migrate_timeout" ]; then
+    err "QMP migrate timed out after ${migrate_timeout}s"
+    err "Last query-migrate response: $({
+      printf '%s\n' '{\"execute\":\"qmp_capabilities\"}'
+      printf '%s\n' '{\"execute\":\"query-migrate\"}'
+    } | qmp_session "$sock" 2>/dev/null || true)"
+    return 1
+  fi
+
+  log "  QMP: quitting VM..."
+  {
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' '{"execute":"quit"}'
+  } | qmp_session "$sock" >/dev/null || true
+
+  return 0
+}
+
 build_one() {
   local arch="$1"
   local base_img="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2"
@@ -245,7 +374,9 @@ build_one() {
   local tmp_img="$tmp_dir/disk.qcow2"
   local seed_iso="$tmp_dir/seed.iso"
   local bundle_iso="$tmp_dir/bundle.iso"
+  local runtime_iso="$tmp_dir/runtime.iso"
   local bundle_dir="$tmp_dir/bundle"
+  local runtime_cfg_dir="$tmp_dir/runtime"
   local serial_log="$tmp_dir/serial.log"
   local provision_log="$tmp_dir/provision.log"
   local pidfile="$tmp_dir/qemu.pid"
@@ -269,16 +400,64 @@ build_one() {
   mkdir -p "$bundle_dir"
   cp "$bundle_tgz" "$bundle_dir/img.tgz"
   cp "$BUILD_ENV_FILE" "$bundle_dir/build.env"
+  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+    # Guest reads this flag to use placeholder secrets and to wait at the end
+    # of provision-build for the host to snapshot the RAM state.
+    printf 'STACK_EMULATOR_BUILD_SNAPSHOT=1\n' >> "$bundle_dir/build.env"
+  fi
   # Tell the guest which arch it's being built for so cross-arch (TCG) builds
   # can skip the smoke test, which isn't reliable under software emulation.
   printf 'STACK_EMULATOR_BUILD_ARCH=%s\n' "$arch" > "$bundle_dir/build-arch.env"
   make_iso_from_dir "$bundle_iso" "STACKBUNDLE" "$bundle_dir"
 
+  # render-stack-env (inside the guest) mounts a STACKCFG disk containing
+  # runtime.env + base.env. At runtime the host-side run-emulator.sh builds
+  # this ISO; at build time stack.service also starts the container, so we
+  # must provide the same shape here. Values mirror the defaults the runtime
+  # would supply — port-prefix 81 and matching host-port numbers (unused at
+  # build time since nothing is port-forwarded, but render-stack-env embeds
+  # them into /run/stack-auth/local-emulator.env).
+  mkdir -p "$runtime_cfg_dir"
+  {
+    printf 'STACK_EMULATOR_PORT_PREFIX=81\n'
+    printf 'STACK_EMULATOR_DASHBOARD_HOST_PORT=26700\n'
+    printf 'STACK_EMULATOR_BACKEND_HOST_PORT=26701\n'
+    printf 'STACK_EMULATOR_MINIO_HOST_PORT=26702\n'
+    printf 'STACK_EMULATOR_INBUCKET_HOST_PORT=26703\n'
+    printf 'STACK_EMULATOR_VM_DIR_HOST=\n'
+  } > "$runtime_cfg_dir/runtime.env"
+  cp "$BUILD_ENV_FILE" "$runtime_cfg_dir/base.env"
+  make_iso_from_dir "$runtime_iso" "STACKCFG" "$runtime_cfg_dir"
+
   : > "$serial_log"
   : > "$provision_log"
   qemu_base="$(qemu_cmd_prefix_for_arch "$arch")"
   log "QEMU command prefix (${arch}): $qemu_base"
 
+  local monitor_sock="$tmp_dir/monitor.sock"
+  local qga_sock="$tmp_dir/qga.sock"
+  local snapshot_args=()
+  local virtfs_args=(-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none")
+  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+    # QMP for stop/migrate/quit; virtio-serial + QGA channel so we can exec
+    # inside the guest post-resume (only needed at runtime but harmless here).
+    # STACKCFG runtime ISO lets stack.service start during the build — same
+    # disk shape render-stack-env expects at runtime.
+    snapshot_args=(
+      -chardev "socket,id=monitor,path=$monitor_sock,server=on,wait=off"
+      -mon "chardev=monitor,mode=control"
+      -chardev "socket,path=$qga_sock,server=on,wait=off,id=qga0"
+      -device virtio-serial
+      -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
+      -drive "file=$runtime_iso,format=raw,if=virtio,readonly=on"
+    )
+    # QEMU disallows migration when virtfs is mounted in the guest — virtfs
+    # has guest-side state (open handles, mount table) that isn't migratable.
+    # Drop the host fs mount in snapshot mode; STACK_SERVICES_READY still
+    # arrives on the serial log so contains_provision_marker can detect it.
+    virtfs_args=()
+  fi
+
   # shellcheck disable=SC2086
   $qemu_base \
     -boot order=c \
@@ -289,16 +468,21 @@ build_one() {
     -drive "file=$bundle_iso,format=raw,if=virtio,readonly=on" \
     -netdev user,id=net0 \
     -device virtio-net-pci,netdev=net0 \
-    -virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none" \
+    "${virtfs_args[@]}" \
+    "${snapshot_args[@]}" \
     -serial "file:$serial_log" \
     -display none \
     -daemonize \
     -pidfile "$pidfile"
 
   pid="$(cat "$pidfile")"
+  local ready_marker="STACK_CLOUD_INIT_DONE"
+  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+    ready_marker="STACK_SERVICES_READY"
+  fi
   elapsed=0
   while [ "$elapsed" -lt "$PROVISION_TIMEOUT" ]; do
-    if contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then
+    if contains_provision_marker "$provision_log" "$serial_log" "$ready_marker"; then
       break
     fi
 
@@ -312,7 +496,7 @@ build_one() {
       if [ "$total_build_lines" -gt "$last_build_lines" ]; then
         echo ""
         sed -n "$((last_build_lines + 1)),${total_build_lines}p" "$provision_log" 2>/dev/null | while IFS= read -r msg; do
-          if [ "$msg" = "STACK_CLOUD_INIT_DONE" ]; then
+          if [ "$msg" = "STACK_CLOUD_INIT_DONE" ] || [ "$msg" = "STACK_SERVICES_READY" ]; then
             continue
           fi
           printf "  [%3ds] %s\n" "$elapsed" "$msg"
@@ -332,7 +516,7 @@ build_one() {
   done
   echo ""
 
-  if ! contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then
+  if ! contains_provision_marker "$provision_log" "$serial_log" "$ready_marker"; then
     if [ "$guest_failed" = true ]; then
       err "Guest provisioning reported failure for emulator (${arch})"
     elif [ "$guest_exited" = true ]; then
@@ -358,17 +542,67 @@ build_one() {
     exit 1
   fi
 
-  local shutdown_wait=0
-  while [ "$shutdown_wait" -lt 90 ] && kill -0 "$pid" 2>/dev/null; do
-    sleep 1
-    shutdown_wait=$((shutdown_wait + 1))
-  done
+  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+    local savevm_file="$IMAGE_DIR/stack-emulator-${arch}.savevm.zst"
+    local savevm_raw="$tmp_dir/state.raw"
+    local savevm_tmp="$tmp_dir/state.zst"
+
+    # Capture raw RAM/device state via QEMU's native file: migration; then
+    # compress on the host side. Avoids any reliance on QEMU spawning a shell
+    # that has zstd in PATH.
+    log "Capturing VM state via QMP (${arch})..."
+    if ! capture_vm_state "$monitor_sock" "$savevm_raw"; then
+      err "Failed to capture VM state for ${arch}"
+      if kill -0 "$pid" 2>/dev/null; then
+        kill "$pid" 2>/dev/null || true
+        sleep 1
+        kill -9 "$pid" 2>/dev/null || true
+      fi
+      persist_provision_logs "$arch" "$serial_log" "$provision_log"
+      rm -rf "$tmp_dir"
+      exit 1
+    fi
 
-  if kill -0 "$pid" 2>/dev/null; then
-    warn "Guest did not power off cleanly; forcing shutdown."
-    kill "$pid" 2>/dev/null || true
-    sleep 2
-    kill -9 "$pid" 2>/dev/null || true
+    # QEMU exited cleanly via `quit`. Wait briefly to release the pid file.
+    local shutdown_wait=0
+    while [ "$shutdown_wait" -lt 30 ] && kill -0 "$pid" 2>/dev/null; do
+      sleep 1
+      shutdown_wait=$((shutdown_wait + 1))
+    done
+    if kill -0 "$pid" 2>/dev/null; then
+      warn "QEMU did not exit after quit; forcing."
+      kill "$pid" 2>/dev/null || true
+      sleep 2
+      kill -9 "$pid" 2>/dev/null || true
+    fi
+
+    if [ ! -s "$savevm_raw" ]; then
+      err "VM state file missing or empty at $savevm_raw"
+      persist_provision_logs "$arch" "$serial_log" "$provision_log"
+      rm -rf "$tmp_dir"
+      exit 1
+    fi
+
+    log "Compressing VM state with zstd..."
+    zstd -3 -T0 --rm -o "$savevm_tmp" "$savevm_raw"
+
+    mv "$savevm_tmp" "$savevm_file"
+    local savevm_size
+    savevm_size="$(du -h "$savevm_file" | cut -f1)"
+    log "Saved VM state: $savevm_file (${savevm_size})"
+  else
+    local shutdown_wait=0
+    while [ "$shutdown_wait" -lt 90 ] && kill -0 "$pid" 2>/dev/null; do
+      sleep 1
+      shutdown_wait=$((shutdown_wait + 1))
+    done
+
+    if kill -0 "$pid" 2>/dev/null; then
+      warn "Guest did not power off cleanly; forcing shutdown."
+      kill "$pid" 2>/dev/null || true
+      sleep 2
+      kill -9 "$pid" 2>/dev/null || true
+    fi
   fi
 
   persist_provision_logs "$arch" "$serial_log" "$provision_log"
diff --git a/docker/local-emulator/qemu/cloud-init/emulator/user-data b/docker/local-emulator/qemu/cloud-init/emulator/user-data
index 38fe2b0646..8a968bf079 100644
--- a/docker/local-emulator/qemu/cloud-init/emulator/user-data
+++ b/docker/local-emulator/qemu/cloud-init/emulator/user-data
@@ -75,12 +75,24 @@ write_files:
       # ssk/sak: required by the emulator's own dashboard (StackServerApp
       #   construction throws without them). Not used by user-app flows; the
       #   /local-emulator/project route mints separate per-project credentials.
+      #
+      # Snapshot-build mode (STACK_EMULATOR_BUILD_SNAPSHOT=1 in /etc/stack-build.env):
+      # use deterministic placeholder hex strings instead of random values. The
+      # built image then contains these placeholders; at every `emulator start`
+      # resume the host generates fresh per-install secrets and
+      # /usr/local/bin/rotate-secrets (inside the stack container) swaps them in.
       umask 077
-      for key in internal-pck internal-ssk internal-sak; do
-        if [ ! -s "/var/lib/stack-auth/$key" ]; then
-          openssl rand -hex 32 > "/var/lib/stack-auth/$key"
-        fi
-      done
+      if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then
+        printf '%s' '00000000000000000000000000000000ffffffffffffffffffffffffffffffff' > /var/lib/stack-auth/internal-pck
+        printf '%s' '00000000000000000000000000000000eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee' > /var/lib/stack-auth/internal-ssk
+        printf '%s' '00000000000000000000000000000000dddddddddddddddddddddddddddddddd' > /var/lib/stack-auth/internal-sak
+      else
+        for key in internal-pck internal-ssk internal-sak; do
+          if [ ! -s "/var/lib/stack-auth/$key" ]; then
+            openssl rand -hex 32 > "/var/lib/stack-auth/$key"
+          fi
+        done
+      fi
       INTERNAL_PCK="$(cat /var/lib/stack-auth/internal-pck)"
       INTERNAL_SSK="$(cat /var/lib/stack-auth/internal-ssk)"
       INTERNAL_SAK="$(cat /var/lib/stack-auth/internal-sak)"
@@ -92,6 +104,15 @@ write_files:
       HOST_SERVICES_HOST=10.0.2.2
       P="$STACK_EMULATOR_PORT_PREFIX"
 
+      # Snapshot-build mode: ship a deterministic placeholder CRON_SECRET so the
+      # baked VM contains a known-public value that rotate-secrets swaps out on
+      # every resume. Outside snapshot-build mode, leave CRON_SECRET unset so
+      # docker/local-emulator/entrypoint.sh generates a fresh random one.
+      EMULATOR_CRON_SECRET=""
+      if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then
+        EMULATOR_CRON_SECRET="00000000000000000000000000000000cccccccccccccccccccccccccccccccc"
+      fi
+
       {
         # Static vars from base config and runtime (e.g. API keys, feature flags)
         cat /mnt/stack-runtime/base.env
@@ -99,6 +120,9 @@ write_files:
         printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$INTERNAL_PCK"
         printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$INTERNAL_SSK"
         printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$INTERNAL_SAK"
+        if [ -n "$EMULATOR_CRON_SECRET" ]; then
+          printf 'CRON_SECRET=%s\n' "$EMULATOR_CRON_SECRET"
+        fi
 
         # Computed vars — depend on port prefix or deps host
         # Host-side ports (for browser URLs — browser runs on host, not in VM)
@@ -142,14 +166,20 @@ write_files:
     permissions: '0755'
     content: |
       #!/bin/bash
-      set -euo pipefail
+      set -uo pipefail
       mkdir -p /host
-      if ! mountpoint -q /host; then
-        if ! mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host; then
-          echo "Failed to mount host filesystem at /host" >&2
-          exit 1
-        fi
+      if mountpoint -q /host; then
+        exit 0
+      fi
+      # In snapshot-build mode the host detaches virtfs (QEMU disallows
+      # migration while it's mounted), and at runtime we re-attach it. Tolerate
+      # both states: try to mount, fall through to an empty /host if no
+      # virtio-9p channel is available.
+      if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host 2>/dev/null; then
+        exit 0
       fi
+      echo "host filesystem unavailable; continuing with empty /host" >&2
+      exit 0
 
   - path: /usr/local/bin/run-stack-container
     permissions: '0755'
@@ -522,6 +552,74 @@ write_files:
       fstrim -av 2>/dev/null || true
       log "slim-docker-image done."
 
+  - path: /usr/local/bin/wait-for-stack-ready
+    permissions: '0755'
+    content: |
+      #!/bin/bash
+      # Poll the stack container's backend + dashboard on the guest's own
+      # localhost until both respond healthy. Used at snapshot-build time to
+      # gate "emit STACK_SERVICES_READY" on the app actually being warm.
+      set -uo pipefail
+
+      TIMEOUT="${STACK_READY_TIMEOUT:-600}"
+      BACKEND_PORT="${STACK_READY_BACKEND_PORT:-8102}"
+      DASHBOARD_PORT="${STACK_READY_DASHBOARD_PORT:-8101}"
+
+      log() { /usr/local/bin/log-provision "wait-for-stack-ready: $*"; }
+
+      start=$SECONDS
+      next_heartbeat=$((start + 30))
+      log "waiting for backend:$BACKEND_PORT and dashboard:$DASHBOARD_PORT (timeout=${TIMEOUT}s)"
+      while true; do
+        backend_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:${BACKEND_PORT}/health?db=1" 2>/dev/null || true)
+        dashboard_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:${DASHBOARD_PORT}/handler/sign-in" 2>/dev/null || true)
+        if [ "$backend_code" = "200" ] && [ "$dashboard_code" = "200" ]; then
+          log "ready ($((SECONDS - start))s)"
+          exit 0
+        fi
+        if [ "$SECONDS" -ge "$next_heartbeat" ]; then
+          log "still waiting (backend=$backend_code dashboard=$dashboard_code, $((SECONDS - start))s elapsed)"
+          next_heartbeat=$((SECONDS + 30))
+        fi
+        if [ "$((SECONDS - start))" -ge "$TIMEOUT" ]; then
+          log "TIMEOUT after $((SECONDS - start))s (backend=$backend_code dashboard=$dashboard_code)"
+          docker ps -a 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: ps" || true
+          docker logs --tail 200 stack 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: stack" || true
+          systemctl status stack.service --no-pager -l 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: svc" || true
+          journalctl -u stack.service --no-pager -n 100 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: jrnl" || true
+          docker image ls 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: img" || true
+          exit 1
+        fi
+        sleep 2
+      done
+
+  - path: /usr/local/bin/trigger-fast-rotate
+    permissions: '0755'
+    content: |
+      #!/bin/bash
+      # Called via qemu-guest-agent on every snapshot resume. Reads fresh
+      # secrets from stdin (key=value lines, written by the host via QGA's
+      # guest-exec input-data) and execs rotate-secrets inside the stack
+      # container with those values exported.
+      set -euo pipefail
+
+      tmp="$(mktemp /var/run/stack-fresh-XXXXXX.env)"
+      cat > "$tmp"
+      chmod 0600 "$tmp"
+
+      # shellcheck disable=SC1090
+      set -a
+      source "$tmp"
+      set +a
+      rm -f "$tmp"
+
+      exec docker exec \
+        -e STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY \
+        -e STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY \
+        -e STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY \
+        -e CRON_SECRET \
+        stack /usr/local/bin/rotate-secrets
+
   - path: /etc/systemd/system/stack.service
     content: |
       [Unit]
@@ -591,6 +689,14 @@ write_files:
       systemctl disable --now ssh || true
       systemctl mask ssh || true
 
+      # qemu-guest-agent: used by the host to inject fresh secrets + trigger
+      # rotate-secrets after a snapshot resume. Must be running INSIDE the VM
+      # at snapshot capture time — the virtio-serial port's "open" state is
+      # part of the migrated device state. If QGA wasn't connected at capture,
+      # the resumed VM's port stays closed and the host can't reach it.
+      systemctl enable qemu-guest-agent || true
+      systemctl start qemu-guest-agent || true
+
       log_provision "installing emulator containers"
       bash /usr/local/bin/install-emulator-containers
 
@@ -603,6 +709,48 @@ write_files:
       log_provision "starting slim-docker-image"
       bash /usr/local/bin/slim-docker-image
 
+      # Snapshot-build mode: bring the stack container up, wait for full
+      # readiness, emit STACK_SERVICES_READY, then wait indefinitely for the
+      # host build script to capture VM state over QMP (stop + migrate + quit).
+      # The VM never shuts itself down in this path — the host tears it down
+      # once the savevm file has been written.
+      if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then
+        log_provision "snapshot-build mode: starting stack.service"
+        systemctl start stack.service || true
+
+        log_provision "waiting for backend + dashboard to be ready"
+        if ! /usr/local/bin/wait-for-stack-ready; then
+          log_provision "ERROR: stack services did not become ready"
+          exit 1
+        fi
+
+        # Ensure qemu-guest-agent is running so its virtio-serial port stays
+        # "open" in the snapshot — the host needs that port at runtime to
+        # trigger rotate-secrets.
+        log_provision "ensuring qemu-guest-agent is up"
+        systemctl restart qemu-guest-agent || true
+        sleep 2
+        if ! systemctl is-active --quiet qemu-guest-agent; then
+          log_provision "ERROR: qemu-guest-agent failed to start"
+          systemctl status qemu-guest-agent --no-pager -l 2>&1 | /usr/local/bin/log-provision-stream "qga"
+          exit 1
+        fi
+        log_provision "qemu-guest-agent active"
+
+        log_provision "services ready; signalling STACK_SERVICES_READY"
+        if [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then
+          printf '%s\n' "STACK_SERVICES_READY" >> "$STACK_PROVISION_LOG_FILE"
+        fi
+        write_marker_to_consoles "STACK_SERVICES_READY"
+        sync || true
+
+        # Clear the EXIT trap so the cleanup path doesn't mark this as failed
+        # when the host powers us off via QMP quit.
+        trap - EXIT
+        # Block forever; host will issue qmp quit after migrate completes.
+        while true; do sleep 3600; done
+      fi
+
       log_provision "build pipeline complete"
       if [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then
         printf '%s\n' "STACK_CLOUD_INIT_DONE" >> "$STACK_PROVISION_LOG_FILE"
diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh
index ba905ca36d..9181e527b3 100755
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@@ -12,6 +12,11 @@ VM_RAM="${EMULATOR_RAM:-4096}"
 VM_CPUS="${EMULATOR_CPUS:-4}"
 PORT_PREFIX="${PORT_PREFIX:-${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}}"
 READY_TIMEOUT="${EMULATOR_READY_TIMEOUT:-240}"
+# Shorter timeout when resuming from a snapshot: services are already running,
+# we only need to wait for rotate-secrets + Node restart (~3-10s).
+SNAPSHOT_READY_TIMEOUT="${EMULATOR_SNAPSHOT_READY_TIMEOUT:-45}"
+# Set to 1 to force a cold boot and ignore any shipped savevm file.
+EMULATOR_NO_SNAPSHOT="${EMULATOR_NO_SNAPSHOT:-0}"
 
 # Fixed host-side ports for the QEMU emulator (267xx range).
 # Only user-facing services are exposed; internal deps stay inside the VM.
@@ -62,10 +67,18 @@ image_path() {
   echo "$IMAGE_DIR/stack-emulator-$ARCH.qcow2"
 }
 
+savevm_path() {
+  echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.zst"
+}
+
 runtime_iso_path() {
   echo "$VM_DIR/runtime-config.iso"
 }
 
+snapshot_available() {
+  [ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ]
+}
+
 # Returns a fast fingerprint (size:mtime) of the base QEMU image.
 # Used to detect whether the image has changed since the overlay was created.
 base_image_fingerprint() {
@@ -77,6 +90,23 @@ base_image_fingerprint() {
   esac
 }
 
+# Fingerprint used to detect stale overlays. Includes both the base qcow2 and
+# the savevm file so the overlay is rebuilt whenever either input changes. The
+# overlay disk must match the disk state the snapshot was taken against for
+# -incoming resume to be consistent.
+runtime_fingerprint() {
+  local base="$1"
+  local savevm="$2"
+  local base_fp savevm_fp
+  base_fp="$(base_image_fingerprint "$base")"
+  if [ -f "$savevm" ]; then
+    savevm_fp="$(base_image_fingerprint "$savevm")"
+  else
+    savevm_fp="no-savevm"
+  fi
+  printf '%s|%s\n' "$base_fp" "$savevm_fp"
+}
+
 prepare_runtime_config_iso() {
   local cfg_dir="$VM_DIR/runtime-config"
   local cfg_iso
@@ -154,8 +184,9 @@ wait_for_condition() {
 }
 
 build_qemu_cmd() {
-  local base_img
+  local base_img savevm_file
   base_img="$(image_path)"
+  savevm_file="$(savevm_path)"
 
   if [ ! -f "$base_img" ]; then
     err "Missing QEMU image: $base_img"
@@ -166,18 +197,35 @@ build_qemu_cmd() {
   mkdir -p "$VM_DIR"
   local fingerprint_file="$VM_DIR/base-image.fingerprint"
   local current_fp
-  current_fp="$(base_image_fingerprint "$base_img")"
-  if [ -f "$VM_DIR/disk.qcow2" ]; then
-    if [ -f "$fingerprint_file" ] && [ "$(cat "$fingerprint_file")" = "$current_fp" ]; then
-      log "Reusing existing overlay disk (changes persist)"
-    else
-      warn "QEMU base image has changed — recreating overlay."
+  current_fp="$(runtime_fingerprint "$base_img" "$savevm_file")"
+
+  if snapshot_available; then
+    # The savevm RAM state was captured against the base image's exact disk
+    # state. An overlay with writes from a previous session diverges from
+    # that point, so -incoming would resume RAM against inconsistent disk.
+    # Always start from a fresh overlay in the snapshot path; per-session
+    # state is not preserved. Users who want persistence can opt out with
+    # EMULATOR_NO_SNAPSHOT=1.
+    if [ -f "$VM_DIR/disk.qcow2" ]; then
       rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file"
     fi
-  fi
-  if [ ! -f "$VM_DIR/disk.qcow2" ]; then
     qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null
-    base_image_fingerprint "$base_img" > "$fingerprint_file"
+    printf '%s' "$current_fp" > "$fingerprint_file"
+  else
+    # If the overlay was created against a different base or savevm, it will
+    # diverge from the snapshot's disk state — force a rebuild.
+    if [ -f "$VM_DIR/disk.qcow2" ]; then
+      if [ -f "$fingerprint_file" ] && [ "$(cat "$fingerprint_file")" = "$current_fp" ]; then
+        log "Reusing existing overlay disk (changes persist)"
+      else
+        warn "Base image or snapshot has changed — recreating overlay."
+        rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file"
+      fi
+    fi
+    if [ ! -f "$VM_DIR/disk.qcow2" ]; then
+      qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null
+      printf '%s' "$current_fp" > "$fingerprint_file"
+    fi
   fi
 
   local qemu_bin machine cpu firmware_args=()
@@ -213,28 +261,100 @@ build_qemu_cmd() {
   # are mutually exclusive.
   netdev+=",hostfwd=tcp:127.0.0.1:${PORT_PREFIX}14-:${PORT_PREFIX}14"
 
-  QEMU_CMD=(
-    "$qemu_bin"
-    -machine "$machine"
-    -accel "$ACCEL"
-    -cpu "$cpu"
-    "${firmware_args[@]}"
-    -boot order=c
-    -m "$VM_RAM"
-    -smp "$VM_CPUS"
-    -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
-    -drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
-    -netdev "$netdev"
-    -device virtio-net-pci,netdev=net0
-    -device virtio-balloon-pci
-    -virtfs "local,path=/,mount_tag=hostfs,security_model=none"
-    -chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
-    -mon "chardev=monitor,mode=control"
-    -serial "file:$VM_DIR/serial.log"
-    -display none
-    -daemonize
-    -pidfile "$VM_DIR/qemu.pid"
-  )
+  # In snapshot-resume mode the QEMU command-line MUST match the device set
+  # used at snapshot capture time, otherwise migration replay fails (broken
+  # pipe / device tree mismatch). At capture time the build attaches:
+  #   disk(if=virtio) + seed.iso + bundle.iso + runtime.iso (all if=virtio)
+  #   netdev + virtio-net-pci + monitor + QGA virtio-serial
+  #   SMP=4, RAM=4096 (pinned in build-image.sh snapshot mode)
+  # We mirror that exactly. The seed/bundle ISOs were used by cloud-init at
+  # build and are not needed at runtime, but their virtio-blk slots must
+  # exist so the migration replay matches device IDs. Runtime-only devices
+  # (virtfs, balloon) live at higher slots — extra at destination is fine.
+  local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS"
+  if snapshot_available; then
+    log "Snapshot found at $savevm_file — fast-resume enabled."
+    snapshot_args+=(-incoming "exec:zstd -dc $savevm_file")
+    snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}"
+    if [ "$snapshot_smp" != "$VM_CPUS" ]; then
+      log "Pinning SMP to ${snapshot_smp} for snapshot resume (build-time value)."
+    fi
+
+    # Tiny placeholder ISOs to match the seed.iso / bundle.iso slots present
+    # at snapshot time. Their content doesn't matter (cloud-init has already
+    # run); only the virtio-blk slot count must match.
+    local seed_phantom="$VM_DIR/seed.phantom"
+    local bundle_phantom="$VM_DIR/bundle.phantom"
+    if [ ! -s "$seed_phantom" ]; then
+      dd if=/dev/zero of="$seed_phantom" bs=1M count=1 status=none
+    fi
+    if [ ! -s "$bundle_phantom" ]; then
+      dd if=/dev/zero of="$bundle_phantom" bs=1M count=1 status=none
+    fi
+    runtime_only_args+=(
+      -drive "file=$seed_phantom,format=raw,if=virtio,readonly=on"
+      -drive "file=$bundle_phantom,format=raw,if=virtio,readonly=on"
+    )
+  else
+    # Cold-boot: include virtio-balloon and virtfs as before.
+    runtime_only_args+=(
+      -device virtio-balloon-pci
+      -virtfs "local,path=/,mount_tag=hostfs,security_model=none"
+    )
+  fi
+
+  if snapshot_available; then
+    QEMU_CMD=(
+      "$qemu_bin"
+      -machine "$machine"
+      -accel "$ACCEL"
+      -cpu "$cpu"
+      "${firmware_args[@]}"
+      -boot order=c
+      -m "$VM_RAM"
+      -smp "$snapshot_smp"
+      -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
+      "${runtime_only_args[@]}"
+      -drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
+      -netdev "$netdev"
+      -device virtio-net-pci,netdev=net0
+      -chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
+      -mon "chardev=monitor,mode=control"
+      -chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0"
+      -device virtio-serial
+      -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
+      "${snapshot_args[@]}"
+      -serial "file:$VM_DIR/serial.log"
+      -display none
+      -daemonize
+      -pidfile "$VM_DIR/qemu.pid"
+    )
+  else
+    QEMU_CMD=(
+      "$qemu_bin"
+      -machine "$machine"
+      -accel "$ACCEL"
+      -cpu "$cpu"
+      "${firmware_args[@]}"
+      -boot order=c
+      -m "$VM_RAM"
+      -smp "$snapshot_smp"
+      -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
+      -drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
+      -netdev "$netdev"
+      -device virtio-net-pci,netdev=net0
+      "${runtime_only_args[@]}"
+      -chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
+      -mon "chardev=monitor,mode=control"
+      -chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0"
+      -device virtio-serial
+      -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
+      -serial "file:$VM_DIR/serial.log"
+      -display none
+      -daemonize
+      -pidfile "$VM_DIR/qemu.pid"
+    )
+  fi
 
 }
 
@@ -274,6 +394,132 @@ start_vm() {
   "${QEMU_CMD[@]}"
 }
 
+# Send one or more QMP commands over the monitor socket. Each line of stdin is
+# a JSON object; capabilities are always negotiated first. Keep stdin open
+# briefly after writing so socat doesn't close before QEMU responds — QMP
+# typically replies in milliseconds so 0.3s is enough.
+qmp_send() {
+  if [ ! -S "$VM_DIR/monitor.sock" ]; then
+    return 1
+  fi
+  local payload
+  payload="$(cat)"
+  {
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' "$payload"
+    sleep 0.3
+  } | socat -t5 - "UNIX-CONNECT:$VM_DIR/monitor.sock" 2>/dev/null
+}
+
+# After -incoming, QEMU is in "inmigrate" until the entire migration stream has
+# been received. Sending `cont` mid-migration would abort it (the host-side
+# decompressor / pipe gets killed). Wait for the VM to reach a runnable state
+# (paused / postmigrate / prelaunch / running) before continuing.
+qmp_wait_for_paused_and_continue() {
+  local deadline=$((SECONDS + 120))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    local out status
+    out=$(printf '%s\n' '{"execute":"query-status"}' | qmp_send || true)
+    status=$(printf '%s' "$out" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')
+    case "$status" in
+      running)
+        return 0
+        ;;
+      paused|postmigrate|prelaunch)
+        printf '%s\n' '{"execute":"cont"}' | qmp_send >/dev/null || true
+        return 0
+        ;;
+      inmigrate|"")
+        # still loading migration data
+        ;;
+      *)
+        log "unexpected QMP status: $status"
+        ;;
+    esac
+    sleep 0.2
+  done
+  return 1
+}
+
+# Generate fresh per-install secrets on the host. We pass them to the guest
+# through QGA's guest-exec input-data field (base64-encoded), so no host file
+# or virtfs mount is needed in the snapshot path.
+generate_fresh_secrets_payload() {
+  printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$(openssl rand -hex 32)"
+  printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$(openssl rand -hex 32)"
+  printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$(openssl rand -hex 32)"
+  printf 'CRON_SECRET=%s\n' "$(openssl rand -hex 32)"
+}
+
+# Drive qemu-guest-agent via its virtserialport socket. QGA speaks the same
+# JSON protocol as QMP but over a separate channel. We use guest-sync to make
+# sure the agent is responsive, then guest-exec to fire trigger-fast-rotate.
+qga_send() {
+  if [ ! -S "$VM_DIR/qga.sock" ]; then
+    return 1
+  fi
+  # socat closes the connection on stdin EOF before QGA can reply, so keep
+  # stdin open for a short window after writing the request to give the
+  # agent time to respond. QGA replies in milliseconds; the only reason this
+  # isn't 0.1s is to absorb scheduling jitter on a busy host.
+  local payload
+  payload="$(cat)"
+  ( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t10 - "UNIX-CONNECT:$VM_DIR/qga.sock" 2>/dev/null
+}
+
+qga_wait_ready() {
+  local deadline=$((SECONDS + 30))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    local resp
+    resp=$(printf '%s\n' '{"execute":"guest-sync","arguments":{"id":424242}}' | qga_send || true)
+    if printf '%s' "$resp" | grep -q '"return":[[:space:]]*424242'; then
+      return 0
+    fi
+    sleep 0.2
+  done
+  return 1
+}
+
+qga_trigger_fast_rotate() {
+  # guest-exec returns a pid; we then poll guest-exec-status until the
+  # process exits, and surface its exit code. Capture output so a failure
+  # message is available in serial.log. We pipe the fresh-secrets env file
+  # (as base64) to the script via input-data — keeps secrets off the
+  # filesystem and avoids needing virtfs.
+  local secrets_b64 resp pid
+  secrets_b64=$(generate_fresh_secrets_payload | base64 | tr -d '\n')
+  local cmd
+  cmd=$(printf '{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/trigger-fast-rotate","capture-output":true,"input-data":"%s"}}' "$secrets_b64")
+  resp=$(printf '%s\n' "$cmd" | qga_send || true)
+  pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
+  if [ -z "$pid" ]; then
+    err "guest-exec did not return a pid; response: $resp"
+    return 1
+  fi
+
+  # Rotation (sed + UPDATE + supervisorctl restart + node startup) fits well
+  # inside this window.
+  local deadline=$((SECONDS + 60))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    local status_resp exited exitcode
+    status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true)
+    exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/')
+    if [ "$exited" = "true" ]; then
+      exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/')
+      if [ "${exitcode:-0}" = "0" ]; then
+        log "rotate-secrets completed."
+        return 0
+      fi
+      err "rotate-secrets exited with code ${exitcode:-unknown}"
+      err "response: $status_resp"
+      return 1
+    fi
+    sleep 0.2
+  done
+  err "rotate-secrets did not complete within 60s"
+  return 1
+}
+
 stop_vm() {
   if [ ! -f "$VM_DIR/qemu.pid" ]; then
     return 0
@@ -305,18 +551,58 @@ cmd_start() {
   info "Arch: $ARCH | Accel: $ACCEL"
   info "Ports: Dashboard=$EMULATOR_DASHBOARD_PORT Backend=$EMULATOR_BACKEND_PORT MinIO=$EMULATOR_MINIO_PORT Inbucket=$EMULATOR_INBUCKET_PORT"
 
+  local using_snapshot=0
+  if snapshot_available; then
+    using_snapshot=1
+  fi
+
   start_vm
 
   info "VM: ${VM_RAM}MB / ${VM_CPUS} CPUs"
 
-  if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then
-    tail_vm_logs
-    exit 1
-  fi
+  if [ "$using_snapshot" = "1" ]; then
+    log "Resuming from snapshot..."
+    if ! qmp_wait_for_paused_and_continue; then
+      warn "Snapshot resume did not reach a runnable state — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
 
-  if ! wait_for_condition "dashboard/backend" "$READY_TIMEOUT" app_ready; then
-    tail_vm_logs
-    exit 1
+    log "VM resumed; waiting for guest agent..."
+    if ! qga_wait_ready; then
+      warn "Guest agent did not respond — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
+
+    log "Generating fresh secrets + triggering rotation..."
+    if ! qga_trigger_fast_rotate; then
+      warn "Failed to trigger rotate-secrets — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
+
+    # Wait for the *new* backend (post-supervisor-restart) to actually be
+    # listening. all_ready may briefly return true against the OLD Node
+    # processes between when supervisor sends SIGTERM and when the children
+    # die; sleep a beat so we measure the real readiness.
+    sleep 1
+    if ! wait_for_condition "rotated services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
+      warn "Services did not recover after rotation — falling back to cold boot."
+      tail_vm_logs
+      snapshot_fallback_to_cold_boot
+      return
+    fi
+  else
+    if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then
+      tail_vm_logs
+      exit 1
+    fi
+
+    if ! wait_for_condition "dashboard/backend" "$READY_TIMEOUT" app_ready; then
+      tail_vm_logs
+      exit 1
+    fi
   fi
 
   log "All services are green."
@@ -324,6 +610,17 @@ cmd_start() {
   info "Backend:   http://localhost:${EMULATOR_BACKEND_PORT}"
 }
 
+# If anything about the snapshot resume fails, stop the VM, wipe the overlay,
+# and retry as a cold boot. Keeps the user unblocked even when the snapshot is
+# broken (e.g. stale, incompatible host-arch/QEMU-version mismatch).
+snapshot_fallback_to_cold_boot() {
+  warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..."
+  stop_vm
+  rm -rf "$VM_DIR"
+  EMULATOR_NO_SNAPSHOT=1
+  cmd_start
+}
+
 cmd_stop() {
   stop_vm
   log "QEMU emulator stopped."
diff --git a/docker/local-emulator/rotate-secrets.sh b/docker/local-emulator/rotate-secrets.sh
new file mode 100644
index 0000000000..d374446426
--- /dev/null
+++ b/docker/local-emulator/rotate-secrets.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# Rotate baked-in placeholder secrets with fresh host-generated values.
+#
+# Called inside the stack container by the emulator snapshot-resume path.
+# Host writes fresh secrets to /host/stack-runtime/fresh-secrets.env before
+# invoking this script (via `docker exec stack /usr/local/bin/rotate-secrets`).
+#
+# Flow:
+#   1. Read fresh secrets from host-supplied env file.
+#   2. Validate they are 64-char hex (the build placeholders are too).
+#   3. Write rotated-secrets.env that app-entrypoint and run-cron-jobs source
+#      on restart.
+#   4. Targeted sed across built files: swap the placeholder PCK for the fresh
+#      one (this is the only secret baked into JS via sentinel replacement at
+#      build time — SSK/SAK/CRON_SECRET flow through process.env only).
+#   5. UPDATE the internal ApiKeySet row in Postgres.
+#   6. supervisorctl restart stack-app + cron-jobs so the new values take
+#      effect in the running Node processes.
+
+set -euo pipefail
+
+OUTPUT=/run/stack-auth/rotated-secrets.env
+WORK_DIR="${STACK_RUNTIME_WORK_DIR:-/app}"
+
+PLACEHOLDER_PCK="00000000000000000000000000000000ffffffffffffffffffffffffffffffff"
+
+log() { printf '[rotate-secrets] %s\n' "$*"; }
+
+# Fresh secrets arrive via env vars (passed by trigger-fast-rotate using
+# `docker exec -e`). For backward compatibility, fall back to a file path if
+# STACK_ROTATE_INPUT is set.
+if [ -n "${STACK_ROTATE_INPUT:-}" ] && [ -f "$STACK_ROTATE_INPUT" ]; then
+  log "reading fresh secrets from $STACK_ROTATE_INPUT"
+  set -a
+  # shellcheck disable=SC1090
+  source "$STACK_ROTATE_INPUT"
+  set +a
+fi
+
+for var in STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY \
+           STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY \
+           STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY \
+           CRON_SECRET; do
+  val="${!var:-}"
+  if [ -z "$val" ]; then
+    log "ERROR: $var is missing from environment"
+    exit 1
+  fi
+  if ! printf '%s' "$val" | grep -Eq '^[0-9a-fA-F]{64}$'; then
+    log "ERROR: $var is not a 64-char hex string"
+    exit 1
+  fi
+done
+
+mkdir -p "$(dirname "$OUTPUT")"
+umask 077
+{
+  printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY"
+  printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY"
+  printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"
+  printf 'CRON_SECRET=%s\n' "$CRON_SECRET"
+  # Mirror these so process.env lookups in Node match env after restart.
+  printf 'NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY"
+  printf 'STACK_SECRET_SERVER_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY"
+  printf 'STACK_SUPER_SECRET_ADMIN_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"
+} > "$OUTPUT"
+chmod 0600 "$OUTPUT"
+log "wrote $OUTPUT"
+
+# The PCK is baked into built JS via STACK_ENV_VAR_SENTINEL replacement at
+# container start (see /app-entrypoint.sh). Swap the placeholder hex for the
+# fresh value across the built tree. Only *.js files need patching; this
+# runs in ~1s on the standalone Next.js bundles.
+if [ "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY" != "$PLACEHOLDER_PCK" ]; then
+  log "rewriting PCK placeholder in $WORK_DIR"
+  # grep -rl narrows the find to only files that contain the placeholder, so
+  # the follow-up sed doesn't walk the whole tree.
+  mapfile -t files < <(grep -rl --include='*.js' "$PLACEHOLDER_PCK" "$WORK_DIR/apps" 2>/dev/null || true)
+  if [ "${#files[@]}" -gt 0 ]; then
+    sed -i "s|${PLACEHOLDER_PCK}|${STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY}|g" "${files[@]}"
+    log "patched ${#files[@]} file(s)"
+  else
+    log "no files contained the placeholder (already rotated?)"
+  fi
+fi
+
+# Update the internal ApiKeySet row so existing dashboard sessions keep
+# working with the new keys. Values are already validated as hex above, so
+# inlining is safe.
+if [ -n "${STACK_DATABASE_CONNECTION_STRING:-}" ]; then
+  log "updating internal ApiKeySet"
+  psql "$STACK_DATABASE_CONNECTION_STRING" -v ON_ERROR_STOP=1 <<SQL
+UPDATE "ApiKeySet" SET
+  "publishableClientKey" = '${STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY}',
+  "secretServerKey"      = '${STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY}',
+  "superSecretAdminKey"  = '${STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY}',
+  "updatedAt"            = NOW()
+WHERE "projectId" = 'internal' AND id = '3142e763-b230-44b5-8636-aa62f7489c26';
+SQL
+fi
+
+log "restarting stack-app and cron-jobs"
+supervisorctl restart stack-app cron-jobs
+log "done"
diff --git a/docker/local-emulator/run-cron-jobs.sh b/docker/local-emulator/run-cron-jobs.sh
index a30cf03e68..77c7c9e4fc 100755
--- a/docker/local-emulator/run-cron-jobs.sh
+++ b/docker/local-emulator/run-cron-jobs.sh
@@ -4,6 +4,14 @@
 
 set -e
 
+# Pick up rotated secrets from the emulator snapshot resume path if present.
+if [ -f /run/stack-auth/rotated-secrets.env ]; then
+  set -a
+  # shellcheck disable=SC1091
+  source /run/stack-auth/rotated-secrets.env
+  set +a
+fi
+
 BACKEND_URL="http://127.0.0.1:${BACKEND_PORT:-8102}"
 
 if [ -z "${CRON_SECRET:-}" ]; then
diff --git a/docker/local-emulator/supervisord.conf b/docker/local-emulator/supervisord.conf
index 32890bfe75..6ceffee211 100644
--- a/docker/local-emulator/supervisord.conf
+++ b/docker/local-emulator/supervisord.conf
@@ -4,6 +4,18 @@ logfile=/var/log/supervisor/supervisord.log
 pidfile=/var/run/supervisord.pid
 loglevel=info
 
+; supervisorctl endpoint — rotate-secrets uses this to restart stack-app and
+; cron-jobs after the emulator snapshot-resume path injects fresh secrets.
+[unix_http_server]
+file=/var/run/supervisor.sock
+chmod=0700
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface
+
+[supervisorctl]
+serverurl=unix:///var/run/supervisor.sock
+
 ; --- PostgreSQL ---
 
 [program:postgres]
@@ -180,6 +192,8 @@ autostart=true
 autorestart=true
 startsecs=0
 priority=70
+stopasgroup=true
+killasgroup=true
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
 stderr_logfile=/dev/stderr
@@ -193,6 +207,13 @@ autostart=true
 autorestart=unexpected
 startsecs=0
 priority=60
+; The wrapper script spawns Node backends as background children. On
+; supervisor restart we MUST kill the whole process group, otherwise the
+; old Node servers keep their port bindings and the new ones fail with
+; EADDRINUSE — breaking the snapshot-resume rotation flow.
+stopasgroup=true
+killasgroup=true
+stopwaitsecs=10
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
 stderr_logfile=/dev/stderr
diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh
index 659eb96286..975c18975e 100644
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@@ -2,6 +2,17 @@
 
 set -e
 
+# ============= ROTATED SECRETS OVERLAY =============
+# On emulator snapshot resume, the host injects freshly-generated secrets into
+# /run/stack-auth/rotated-secrets.env before supervisorctl restarts us. Sourcing
+# here lets a fast-restart pick up new values without a full container restart.
+if [ -f /run/stack-auth/rotated-secrets.env ]; then
+  set -a
+  # shellcheck disable=SC1091
+  source /run/stack-auth/rotated-secrets.env
+  set +a
+fi
+
 # ============= FORWARD MOCK OAUTH SERVER =============
 
 # Start socat to forward port 32202 for mock-oauth-server if enabled
@@ -130,39 +141,49 @@ if [ "$WORK_DIR" != "/app" ]; then
   cp -r /app/. "$WORK_DIR"/.
 fi
 
-# Find all files in the apps directory that contain a STACK_ENV_VAR_SENTINEL and extract the unique sentinel strings.
-echo "Finding unhandled sentinels..."
-unhandled_sentinels=$(find "$WORK_DIR/apps" -type f -exec grep -l "STACK_ENV_VAR_SENTINEL" {} + | \
-  xargs grep -h "STACK_ENV_VAR_SENTINEL" | \
-  grep -o "STACK_ENV_VAR_SENTINEL[A-Z_]*" | \
-  sort -u | grep -v "^STACK_ENV_VAR_SENTINEL$")
-
-# Choose an uncommon delimiter – here, we use the ASCII Unit Separator (0x1F)
-delimiter=$(printf '\037')
-
-echo "Replacing sentinels..."
-for sentinel in $unhandled_sentinels; do
-  # The sentinel is like "STACK_ENV_VAR_SENTINEL_MY_VAR", so extract the env var name.
-  env_var=${sentinel#STACK_ENV_VAR_SENTINEL_}
-  
-  # Get the corresponding environment variable value.
-  value="${!env_var}"
-  
-  # If the env var is not set, skip replacement.
-  if [ -z "$value" ]; then
-    continue
-  fi
-
-  # Although the sentinel only contains [A-Z_] we still escape it for any regex meta-characters.
-  escaped_sentinel=$(printf '%s\n' "$sentinel" | sed -e 's/\\/\\\\/g' -e 's/[][\/.^$*]/\\&/g')
-
-  # For the replacement value, first escape backslashes, then escape any occurrence of
-  # the chosen delimiter and the '&' (which has special meaning in sed replacements).
-  escaped_value=$(printf '%s\n' "$value" | sed -e 's/\\/\\\\/g' -e "s/[${delimiter}&]/\\\\&/g")
-
-  # Now replace the sentinel with the (properly escaped) value in all files in the working directory.
-  find $WORK_DIR/apps -type f -exec sed -i "s${delimiter}${escaped_sentinel}${delimiter}${escaped_value}${delimiter}g" {} +
-done
+# The full-tree sentinel scan is expensive (several seconds over the whole built
+# app tree). On a fast-restart — triggered by the emulator snapshot rotation
+# path — the placeholders have already been sed-replaced by rotate-secrets,
+# and no new sentinels need substitution. Skip the scan in that case.
+SENTINEL_MARKER=/var/run/stack-local-sentinels-replaced
+if [ -f "$SENTINEL_MARKER" ]; then
+  echo "Sentinels already replaced on a previous start; skipping scan."
+else
+  # Find all files in the apps directory that contain a STACK_ENV_VAR_SENTINEL and extract the unique sentinel strings.
+  echo "Finding unhandled sentinels..."
+  unhandled_sentinels=$(find "$WORK_DIR/apps" -type f -exec grep -l "STACK_ENV_VAR_SENTINEL" {} + | \
+    xargs grep -h "STACK_ENV_VAR_SENTINEL" | \
+    grep -o "STACK_ENV_VAR_SENTINEL[A-Z_]*" | \
+    sort -u | grep -v "^STACK_ENV_VAR_SENTINEL$")
+
+  # Choose an uncommon delimiter – here, we use the ASCII Unit Separator (0x1F)
+  delimiter=$(printf '\037')
+
+  echo "Replacing sentinels..."
+  for sentinel in $unhandled_sentinels; do
+    # The sentinel is like "STACK_ENV_VAR_SENTINEL_MY_VAR", so extract the env var name.
+    env_var=${sentinel#STACK_ENV_VAR_SENTINEL_}
+
+    # Get the corresponding environment variable value.
+    value="${!env_var}"
+
+    # If the env var is not set, skip replacement.
+    if [ -z "$value" ]; then
+      continue
+    fi
+
+    # Although the sentinel only contains [A-Z_] we still escape it for any regex meta-characters.
+    escaped_sentinel=$(printf '%s\n' "$sentinel" | sed -e 's/\\/\\\\/g' -e 's/[][\/.^$*]/\\&/g')
+
+    # For the replacement value, first escape backslashes, then escape any occurrence of
+    # the chosen delimiter and the '&' (which has special meaning in sed replacements).
+    escaped_value=$(printf '%s\n' "$value" | sed -e 's/\\/\\\\/g' -e "s/[${delimiter}&]/\\\\&/g")
+
+    # Now replace the sentinel with the (properly escaped) value in all files in the working directory.
+    find $WORK_DIR/apps -type f -exec sed -i "s${delimiter}${escaped_sentinel}${delimiter}${escaped_value}${delimiter}g" {} +
+  done
+  mkdir -p "$(dirname "$SENTINEL_MARKER")" && touch "$SENTINEL_MARKER"
+fi
 
 # ============= START BACKEND AND DASHBOARD =============
 
diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index 9544b0c890..7753cc86a5 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -162,22 +162,44 @@ async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branc
   const repo = opts.repo ?? "stack-auth/stack-auth";
   const branch = opts.branch ?? "dev";
   const tag = opts.tag ?? `emulator-${branch}-latest`;
-  const asset = `stack-emulator-${arch}.qcow2`;
   const imageDir = emulatorImageDir();
   mkdirSync(imageDir, { recursive: true });
+
+  const diskAsset = `stack-emulator-${arch}.qcow2`;
+  // The savevm file enables the fast-resume path in run-emulator.sh. It's
+  // optional — older releases may not have it and the runtime cleanly falls
+  // back to a cold boot.
+  const snapshotAsset = `stack-emulator-${arch}.savevm.zst`;
+
+  const assets = JSON.parse(gh(["release", "view", tag, "--repo", repo, "--json", "assets"])) as {
+    assets: { name: string, apiUrl: string, size: number }[],
+  };
+  const diskMatch = assets.assets.find((a) => a.name === diskAsset);
+  if (!diskMatch) {
+    throw new CliError(`Asset ${diskAsset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`);
+  }
+  const snapshotMatch = assets.assets.find((a) => a.name === snapshotAsset);
+  const token = gh(["auth", "token"]);
+
+  await downloadAsset(diskMatch, imageDir, diskAsset, token, tag);
+  if (snapshotMatch) {
+    await downloadAsset(snapshotMatch, imageDir, snapshotAsset, token, tag);
+  } else {
+    console.log(`Snapshot asset ${snapshotAsset} not available in release ${tag}; fast-start disabled for this image.`);
+  }
+}
+
+async function downloadAsset(
+  match: { name: string, apiUrl: string, size: number },
+  imageDir: string,
+  asset: string,
+  token: string,
+  tag: string,
+): Promise<void> {
   const dest = join(imageDir, asset);
   const tmpDest = `${dest}.download`;
-
   console.log(`Pulling ${asset} from release ${tag}...`);
   try {
-    const assets = JSON.parse(gh(["release", "view", tag, "--repo", repo, "--json", "assets"])) as {
-      assets: { name: string, apiUrl: string, size: number }[],
-    };
-    const match = assets.assets.find((a) => a.name === asset);
-    if (!match) {
-      throw new CliError(`Asset ${asset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`);
-    }
-    const token = gh(["auth", "token"]);
     await downloadWithProgress(match.apiUrl, {
       Authorization: `Bearer ${token}`,
       Accept: "application/octet-stream",
@@ -185,7 +207,7 @@ async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branc
   } catch (err) {
     if (existsSync(tmpDest)) unlinkSync(tmpDest);
     if (err instanceof CliError) throw err;
-    throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}\nRun 'stack emulator list-releases' to see available releases.`);
+    throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}`);
   }
   renameSync(tmpDest, dest);
   console.log(`Downloaded: ${dest}`);
@@ -291,7 +313,9 @@ export function registerEmulatorCommand(program: Command) {
         const imageDir = emulatorImageDir();
         mkdirSync(imageDir, { recursive: true });
         const dest = join(imageDir, `stack-emulator-${arch}.qcow2`);
+        const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`);
         if (existsSync(dest)) unlinkSync(dest);
+        if (existsSync(snapshotDest)) unlinkSync(snapshotDest);
         console.log(`Downloading qemu-emulator-${arch} from workflow run ${runId}...`);
         try {
           execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}`, "--dir", imageDir], { stdio: "inherit" });
@@ -300,6 +324,15 @@ export function registerEmulatorCommand(program: Command) {
         }
         if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`);
         console.log(`Downloaded: ${dest}`);
+        // Snapshot artifact is optional — older CI builds may not produce it.
+        try {
+          execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}-savevm`, "--dir", imageDir], { stdio: "pipe" });
+          if (existsSync(snapshotDest)) {
+            console.log(`Downloaded: ${snapshotDest}`);
+          }
+        } catch {
+          console.log(`Snapshot artifact not available for run ${runId}; fast-start disabled.`);
+        }
       } else {
         await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag });
       }

From 30dbdffc4a77b0e2ee2b327517a28b53b5824eb1 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 13:04:15 -0700
Subject: [PATCH 03/25] faster snapshot resume via mapped-ram + rotation
 opt-out
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Snapshot resume drops from ~14s to ~5-7s with rotation, ~2.5s without.

Build uses QEMU's mapped-ram + multifd migration capability so the RAM
state is written at page-aligned offsets in a sparse file. Runtime
decompresses the shipped .savevm.zst once to a local .raw cache and
reloads via -incoming file: + migrate-incoming on subsequent starts,
avoiding the per-start zstd decode.

Adds EMULATOR_NO_ROTATION=1 for tests/CI that don't mind the placeholder
secrets; saves the full ~3s rotation window.

Misc runtime cleanups: tighter QMP/QGA poll intervals (1s → 0.2s),
shorter socat keep-alive windows, 1s settle before the post-rotation
health-check to avoid racing old Node processes, fallback path preserves
the CLI-generated runtime-config.iso instead of blowing away VM_DIR.

Build-time qmp_session keeps stdin open briefly after the caller's
commands so migrate-set-capabilities is actually processed before
socat closes — without this, mapped-ram was silently a no-op.

CI workflow publishes .savevm.zst alongside the .qcow2 (optional asset;
CLI falls back to cold boot when missing). Test + verify steps go
through the CLI now that ISO generation is owned by packages/stack-cli.
---
 .github/workflows/qemu-emulator-build.yaml    | 127 ++++--
 docker/local-emulator/qemu/build-image.sh     |  35 +-
 docker/local-emulator/qemu/run-emulator.sh    | 165 +++++--
 packages/stack-cli/package.json               |   4 +-
 .../stack-cli/src/commands/emulator.test.ts   | 166 ++++++++
 packages/stack-cli/src/commands/emulator.ts   | 337 ++++++++++++---
 packages/stack-cli/src/lib/iso.test.ts        | 259 +++++++++++
 packages/stack-cli/src/lib/iso.ts             | 403 ++++++++++++++++++
 packages/stack-cli/vitest.config.ts           |  19 +
 pnpm-lock.yaml                                | 241 +++--------
 10 files changed, 1444 insertions(+), 312 deletions(-)
 create mode 100644 packages/stack-cli/src/commands/emulator.test.ts
 create mode 100644 packages/stack-cli/src/lib/iso.test.ts
 create mode 100644 packages/stack-cli/src/lib/iso.ts
 create mode 100644 packages/stack-cli/vitest.config.ts

diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml
index 5df1497465..4bb738124d 100644
--- a/.github/workflows/qemu-emulator-build.yaml
+++ b/.github/workflows/qemu-emulator-build.yaml
@@ -55,10 +55,21 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
+      - uses: pnpm/action-setup@v4
+        if: matrix.arch == 'amd64'
+        with:
+          version: 10.23.0
+
+      - uses: actions/setup-node@v4
+        if: matrix.arch == 'amd64'
+        with:
+          node-version: 22
+          cache: pnpm
+
       - name: Install QEMU dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64
+          sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64 zstd
 
       - name: Enable KVM access
         run: |
@@ -82,41 +93,61 @@ jobs:
       - name: Generate emulator env
         run: node docker/local-emulator/generate-env-development.mjs
 
-      # arm64 runs under cross-arch TCG on an amd64 runner; the backend's
-      # V8 TurboFan JIT re-triggers the SIGTRAPs we dodge in migrations
-      # with --no-opt, and even if it didn't, boot is too slow under TCG
-      # to verify in any sane window. amd64 KVM already exercises the
-      # service stack; real arm64 hosts have KVM for end-users.
-      - name: Start emulator and verify
+      # amd64 runs under KVM on the runner so we can boot the newly-built
+      # image to verify it works end-to-end before publishing. arm64 runs
+      # under cross-arch TCG on an amd64 host, which can't reliably boot
+      # Next.js within any sane window — skipped.
+      - name: Build stack-cli (for emulator CLI)
         if: matrix.arch == 'amd64'
         run: |
-          chmod +x docker/local-emulator/qemu/run-emulator.sh
-          EMULATOR_ARCH=${{ matrix.arch }} \
-          EMULATOR_READY_TIMEOUT=3200 \
-            docker/local-emulator/qemu/run-emulator.sh start
+          pnpm install --frozen-lockfile --filter @stackframe/stack-cli...
+          pnpm --filter @stackframe/stack-cli run build
+
+      - name: Start emulator and verify
+        if: matrix.arch == 'amd64'
+        env:
+          EMULATOR_ARCH: ${{ matrix.arch }}
+          EMULATOR_READY_TIMEOUT: 3200
+          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
+          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
+        run: node packages/stack-cli/dist/index.js emulator start
 
       - name: Verify services are healthy
         if: matrix.arch == 'amd64'
-        run: |
-          EMULATOR_ARCH=${{ matrix.arch }} \
-            docker/local-emulator/qemu/run-emulator.sh status
+        env:
+          EMULATOR_ARCH: ${{ matrix.arch }}
+          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
+          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
+        run: node packages/stack-cli/dist/index.js emulator status
 
       - name: Stop emulator
         if: always() && matrix.arch == 'amd64'
-        run: |
-          EMULATOR_ARCH=${{ matrix.arch }} \
-            docker/local-emulator/qemu/run-emulator.sh stop
+        env:
+          EMULATOR_ARCH: ${{ matrix.arch }}
+          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
+          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
+        run: node packages/stack-cli/dist/index.js emulator stop
 
       - name: Package image
         run: |
           BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.qcow2"
+          SAVEVM="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.savevm.zst"
           cp "$BASE_IMG" "stack-emulator-${{ matrix.arch }}.qcow2"
+          if [ -f "$SAVEVM" ]; then
+            cp "$SAVEVM" "stack-emulator-${{ matrix.arch }}.savevm.zst"
+            ls -lh "stack-emulator-${{ matrix.arch }}.savevm.zst"
+          else
+            echo "NOTE: no savevm snapshot was produced; fast-start will be unavailable for this arch."
+          fi
 
       - name: Upload image artifact
         uses: actions/upload-artifact@v4
         with:
           name: qemu-emulator-${{ matrix.arch }}
-          path: stack-emulator-${{ matrix.arch }}.qcow2
+          path: |
+            stack-emulator-${{ matrix.arch }}.qcow2
+            stack-emulator-${{ matrix.arch }}.savevm.zst
+          if-no-files-found: warn
           retention-days: 30
           compression-level: 0
 
@@ -137,28 +168,48 @@ jobs:
       - name: Install QEMU dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y qemu-system-x86 qemu-utils genisoimage socat
+          sudo apt-get install -y qemu-system-x86 qemu-utils socat zstd
+
+      - uses: pnpm/action-setup@v4
+        with:
+          version: 10.23.0
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+
+      - name: Install stack-cli deps + build
+        run: |
+          pnpm install --frozen-lockfile --filter @stackframe/stack-cli...
+          pnpm --filter @stackframe/stack-cli run build
 
       - name: Download built image
         uses: actions/download-artifact@v4
         with:
           name: qemu-emulator-${{ matrix.arch }}
-          path: docker/local-emulator/qemu/images/
+          path: ${{ github.workspace }}/.stack-emulator-images/
 
-      - name: Generate emulator env
-        run: node docker/local-emulator/generate-env-development.mjs
+      - name: Place images into STACK_EMULATOR_HOME layout
+        run: |
+          mkdir -p "$HOME/.stack/emulator/images"
+          cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$HOME/.stack/emulator/images/"
+          if [ -f "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.savevm.zst" ]; then
+            cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.savevm.zst" "$HOME/.stack/emulator/images/"
+            echo "Snapshot present — will test snapshot-resume path."
+          else
+            echo "No snapshot — will test cold-boot path."
+          fi
+          ls -lh "$HOME/.stack/emulator/images/"
 
-      - name: Start emulator from artifact
+      - name: Start emulator via CLI
         run: |
-          chmod +x docker/local-emulator/qemu/run-emulator.sh docker/local-emulator/qemu/common.sh
           EMULATOR_ARCH=${{ matrix.arch }} \
           EMULATOR_READY_TIMEOUT=600 \
-            docker/local-emulator/qemu/run-emulator.sh start
+            node packages/stack-cli/dist/index.js emulator start
 
       - name: Verify services are healthy
-        run: |
-          EMULATOR_ARCH=${{ matrix.arch }} \
-            docker/local-emulator/qemu/run-emulator.sh status
+        run: node packages/stack-cli/dist/index.js emulator status
 
       - name: Smoke test — backend health
         run: curl -sf http://localhost:26701/health?db=1
@@ -174,13 +225,11 @@ jobs:
 
       - name: Stop emulator
         if: always()
-        run: |
-          EMULATOR_ARCH=${{ matrix.arch }} \
-            docker/local-emulator/qemu/run-emulator.sh stop
+        run: node packages/stack-cli/dist/index.js emulator stop
 
       - name: Print serial log on failure
         if: failure()
-        run: tail -100 docker/local-emulator/qemu/run/vm/serial.log 2>/dev/null || true
+        run: tail -100 $HOME/.stack/emulator/run/vm/serial.log 2>/dev/null || true
 
   publish:
     name: Publish to GitHub Releases
@@ -211,6 +260,11 @@ jobs:
           for f in artifacts/qemu-emulator-*/*.qcow2; do
             cp "$f" release/
           done
+          # savevm.zst is optional — older branches may not produce it. Skip
+          # missing files rather than failing the publish.
+          for f in artifacts/qemu-emulator-*/*.savevm.zst; do
+            [ -f "$f" ] && cp "$f" release/
+          done
 
           cat > release-notes.md <<EOF
           ## QEMU Emulator Images
@@ -220,8 +274,13 @@ jobs:
           ### Images
           | File | Description |
           |------|-------------|
-          | \`stack-emulator-arm64.qcow2\` | ARM64 emulator image |
-          | \`stack-emulator-amd64.qcow2\` | AMD64 emulator image |
+          | \`stack-emulator-arm64.qcow2\` | ARM64 disk image |
+          | \`stack-emulator-amd64.qcow2\` | AMD64 disk image |
+          | \`stack-emulator-arm64.savevm.zst\` | ARM64 warm VM snapshot (fast-start) |
+          | \`stack-emulator-amd64.savevm.zst\` | AMD64 warm VM snapshot (fast-start) |
+
+          \`emulator pull\` downloads both; \`emulator start\` uses the snapshot
+          when present and falls back to cold-boot otherwise.
 
           ### Usage
           \`\`\`bash
diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index 0babc5e2b1..84ab0fa341 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -255,9 +255,13 @@ persist_provision_logs() {
 # object per line); responses are written to stdout. Uses socat's bidirectional
 # pipe so we can interleave request/response in one connection — QMP requires
 # qmp_capabilities to come first and keeps state across commands.
+# Keeps stdin open briefly after caller's input ends so QEMU has time to
+# process the last command before socat closes.
 qmp_session() {
   local sock="$1"
-  socat -t30 - "UNIX-CONNECT:${sock}"
+  local payload
+  payload="$(cat)"
+  ( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t30 - "UNIX-CONNECT:${sock}"
 }
 
 # Drive the snapshot capture over QMP:
@@ -284,9 +288,32 @@ capture_vm_state() {
     return 1
   }
 
+  log "  QMP: enabling mapped-ram + multifd for fast resume..."
+  # mapped-ram: writes each RAM page to a fixed offset in the output file
+  # (vs the legacy streamed format). This lets the target QEMU mmap the file
+  # and fault pages lazily — and combined with multifd, load RAM in parallel.
+  # multifd-channels=4 matches our pinned SMP so the channels don't starve
+  # each other on the target's 4 vCPUs.
+  local caps_cmd params_cmd
+  caps_cmd='{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}'
+  params_cmd='{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}'
+  local setup_resp
+  setup_resp=$({
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' "$caps_cmd"
+    printf '%s\n' "$params_cmd"
+  } | qmp_session "$sock") || {
+    err "QMP capabilities setup failed"
+    return 1
+  }
+  if printf '%s' "$setup_resp" | grep -q '"error"[[:space:]]*:'; then
+    err "QMP capabilities returned error: $setup_resp"
+    return 1
+  fi
+
   log "  QMP: migrating RAM state to ${guest_path}..."
   # Use file: migration (native QEMU) instead of exec: to avoid relying on a
-  # spawned shell finding zstd in PATH. We compress as a separate host step
+  # spawned shell finding zstd in PATH. Compressed as a separate host step
   # after migrate completes.
   local migrate_cmd
   migrate_cmd=$(printf '{"execute":"migrate","arguments":{"uri":"file:%s"}}' "$guest_path")
@@ -583,8 +610,10 @@ build_one() {
       exit 1
     fi
 
+    # zstd -1 trades ~30% larger file for ~40% faster decompression at resume.
+    # For shipping-and-decompress-once-per-start, that's the right balance.
     log "Compressing VM state with zstd..."
-    zstd -3 -T0 --rm -o "$savevm_tmp" "$savevm_raw"
+    zstd -1 -T0 --rm -o "$savevm_tmp" "$savevm_raw"
 
     mv "$savevm_tmp" "$savevm_file"
     local savevm_size
diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh
index 9181e527b3..12564f369d 100755
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@@ -17,6 +17,10 @@ READY_TIMEOUT="${EMULATOR_READY_TIMEOUT:-240}"
 SNAPSHOT_READY_TIMEOUT="${EMULATOR_SNAPSHOT_READY_TIMEOUT:-45}"
 # Set to 1 to force a cold boot and ignore any shipped savevm file.
 EMULATOR_NO_SNAPSHOT="${EMULATOR_NO_SNAPSHOT:-0}"
+# Skip the post-resume secret rotation. Keeps the baked placeholder secrets
+# in place — acceptable for tests and CI that don't reach the emulator over
+# a shared network. Shaves ~2-3s off `emulator start`.
+EMULATOR_NO_ROTATION="${EMULATOR_NO_ROTATION:-0}"
 
 # Fixed host-side ports for the QEMU emulator (267xx range).
 # Only user-facing services are exposed; internal deps stay inside the VM.
@@ -71,6 +75,13 @@ savevm_path() {
   echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.zst"
 }
 
+# Cached, decompressed mapped-ram file. Created on first resume from the .zst
+# and reused on subsequent resumes — mapped-ram format requires a seekable
+# file, so we can't stream through zstd and use multifd at the same time.
+savevm_raw_path() {
+  echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.raw"
+}
+
 runtime_iso_path() {
   echo "$VM_DIR/runtime-config.iso"
 }
@@ -79,6 +90,40 @@ snapshot_available() {
   [ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ]
 }
 
+# Ensure the decompressed mapped-ram cache is up-to-date with the shipped
+# .zst. Compares mtime: if .raw is older or missing, re-decompress.
+ensure_savevm_raw() {
+  local zst raw
+  zst="$(savevm_path)"
+  raw="$(savevm_raw_path)"
+
+  local zst_ts raw_ts
+  case "$HOST_OS" in
+    darwin)
+      zst_ts="$(stat -f '%m' "$zst" 2>/dev/null || echo 0)"
+      raw_ts="$(stat -f '%m' "$raw" 2>/dev/null || echo 0)"
+      ;;
+    *)
+      zst_ts="$(stat -c '%Y' "$zst" 2>/dev/null || echo 0)"
+      raw_ts="$(stat -c '%Y' "$raw" 2>/dev/null || echo 0)"
+      ;;
+  esac
+
+  if [ -s "$raw" ] && [ "$raw_ts" -ge "$zst_ts" ]; then
+    return 0
+  fi
+
+  log "Decompressing snapshot cache (one-time; ~2-3GB sparse)..."
+  local tmp="${raw}.tmp"
+  rm -f "$tmp"
+  if ! zstd -dc "$zst" > "$tmp"; then
+    err "Failed to decompress $zst"
+    rm -f "$tmp"
+    return 1
+  fi
+  mv "$tmp" "$raw"
+}
+
 # Returns a fast fingerprint (size:mtime) of the base QEMU image.
 # Used to detect whether the image has changed since the overlay was created.
 base_image_fingerprint() {
@@ -107,22 +152,14 @@ runtime_fingerprint() {
   printf '%s|%s\n' "$base_fp" "$savevm_fp"
 }
 
-prepare_runtime_config_iso() {
-  local cfg_dir="$VM_DIR/runtime-config"
+ensure_runtime_config_iso() {
   local cfg_iso
   cfg_iso="$(runtime_iso_path)"
-  rm -rf "$cfg_dir"
-  mkdir -p "$cfg_dir"
-  {
-    printf "STACK_EMULATOR_PORT_PREFIX=%s\n" "$PORT_PREFIX"
-    printf "STACK_EMULATOR_DASHBOARD_HOST_PORT=%s\n" "$EMULATOR_DASHBOARD_PORT"
-    printf "STACK_EMULATOR_BACKEND_HOST_PORT=%s\n" "$EMULATOR_BACKEND_PORT"
-    printf "STACK_EMULATOR_MINIO_HOST_PORT=%s\n" "$EMULATOR_MINIO_PORT"
-    printf "STACK_EMULATOR_INBUCKET_HOST_PORT=%s\n" "$EMULATOR_INBUCKET_PORT"
-    printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$VM_DIR"
-  } > "$cfg_dir/runtime.env"
-  cp "$SCRIPT_DIR/../.env.development" "$cfg_dir/base.env"
-  make_iso_from_dir "$cfg_iso" "STACKCFG" "$cfg_dir"
+  if [ ! -s "$cfg_iso" ]; then
+    err "Runtime config ISO missing at $cfg_iso."
+    err "The CLI normally generates this; if you're invoking run-emulator.sh directly, run via 'stack emulator start' instead."
+    exit 1
+  fi
 }
 
 service_is_up() {
@@ -274,7 +311,10 @@ build_qemu_cmd() {
   local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS"
   if snapshot_available; then
     log "Snapshot found at $savevm_file — fast-resume enabled."
-    snapshot_args+=(-incoming "exec:zstd -dc $savevm_file")
+    # -incoming defer: QEMU starts, waits for a QMP migrate-incoming command.
+    # We use that to set mapped-ram + multifd capabilities before loading,
+    # which enables parallel RAM restore (~2-3x faster than streamed decode).
+    snapshot_args+=(-incoming defer)
     snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}"
     if [ "$snapshot_smp" != "$VM_CPUS" ]; then
       log "Pinning SMP to ${snapshot_smp} for snapshot resume (build-time value)."
@@ -389,7 +429,7 @@ ensure_ports_free() {
 start_vm() {
   mkdir -p "$VM_DIR"
   : > "$VM_DIR/serial.log"
-  prepare_runtime_config_iso
+  ensure_runtime_config_iso
   build_qemu_cmd
   "${QEMU_CMD[@]}"
 }
@@ -411,12 +451,34 @@ qmp_send() {
   } | socat -t5 - "UNIX-CONNECT:$VM_DIR/monitor.sock" 2>/dev/null
 }
 
-# After -incoming, QEMU is in "inmigrate" until the entire migration stream has
-# been received. Sending `cont` mid-migration would abort it (the host-side
-# decompressor / pipe gets killed). Wait for the VM to reach a runnable state
-# (paused / postmigrate / prelaunch / running) before continuing.
-qmp_wait_for_paused_and_continue() {
-  local deadline=$((SECONDS + 120))
+# After -incoming defer, QEMU waits for a migrate-incoming command. This sets
+# up mapped-ram + multifd capabilities and kicks off the RAM load from the
+# decompressed cache file. Returns once the VM is running.
+qmp_incoming_and_cont() {
+  local raw_file="$1"
+
+  # Set caps + parameters before migrate-incoming, same as source.
+  local setup_resp
+  setup_resp=$( {
+    printf '%s\n' '{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}'
+    printf '%s\n' '{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}'
+  } | qmp_send)
+  if printf '%s' "$setup_resp" | grep -q '"error"'; then
+    err "QMP caps setup failed: $setup_resp"
+    return 1
+  fi
+
+  # Kick off the incoming migration from the mapped-ram file.
+  local inc_cmd inc_resp
+  inc_cmd=$(printf '{"execute":"migrate-incoming","arguments":{"uri":"file:%s"}}' "$raw_file")
+  inc_resp=$(printf '%s\n' "$inc_cmd" | qmp_send)
+  if printf '%s' "$inc_resp" | grep -q '"error"'; then
+    err "QMP migrate-incoming failed: $inc_resp"
+    return 1
+  fi
+
+  # Poll until status reaches a runnable state, then cont.
+  local deadline=$((SECONDS + 60))
   while [ "$SECONDS" -lt "$deadline" ]; do
     local out status
     out=$(printf '%s\n' '{"execute":"query-status"}' | qmp_send || true)
@@ -430,7 +492,6 @@ qmp_wait_for_paused_and_continue() {
         return 0
         ;;
       inmigrate|"")
-        # still loading migration data
         ;;
       *)
         log "unexpected QMP status: $status"
@@ -539,7 +600,6 @@ stop_vm() {
     fi
   fi
   rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/serial.log"
-  rm -rf "$VM_DIR/runtime-config"
   rm -f "$VM_DIR/runtime-config.iso"
 }
 
@@ -553,6 +613,11 @@ cmd_start() {
 
   local using_snapshot=0
   if snapshot_available; then
+    if ! ensure_savevm_raw; then
+      warn "Snapshot decompression failed — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
     using_snapshot=1
   fi
 
@@ -561,8 +626,8 @@ cmd_start() {
   info "VM: ${VM_RAM}MB / ${VM_CPUS} CPUs"
 
   if [ "$using_snapshot" = "1" ]; then
-    log "Resuming from snapshot..."
-    if ! qmp_wait_for_paused_and_continue; then
+    log "Resuming from snapshot (mapped-ram + multifd)..."
+    if ! qmp_incoming_and_cont "$(savevm_raw_path)"; then
       warn "Snapshot resume did not reach a runnable state — falling back to cold boot."
       snapshot_fallback_to_cold_boot
       return
@@ -575,23 +640,33 @@ cmd_start() {
       return
     fi
 
-    log "Generating fresh secrets + triggering rotation..."
-    if ! qga_trigger_fast_rotate; then
-      warn "Failed to trigger rotate-secrets — falling back to cold boot."
-      snapshot_fallback_to_cold_boot
-      return
-    fi
+    if [ "$EMULATOR_NO_ROTATION" = "1" ]; then
+      warn "EMULATOR_NO_ROTATION=1: snapshot's placeholder secrets are in effect — do not expose this instance."
+      if ! wait_for_condition "services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
+        warn "Services did not respond after resume — falling back to cold boot."
+        tail_vm_logs
+        snapshot_fallback_to_cold_boot
+        return
+      fi
+    else
+      log "Generating fresh secrets + triggering rotation..."
+      if ! qga_trigger_fast_rotate; then
+        warn "Failed to trigger rotate-secrets — falling back to cold boot."
+        snapshot_fallback_to_cold_boot
+        return
+      fi
 
-    # Wait for the *new* backend (post-supervisor-restart) to actually be
-    # listening. all_ready may briefly return true against the OLD Node
-    # processes between when supervisor sends SIGTERM and when the children
-    # die; sleep a beat so we measure the real readiness.
-    sleep 1
-    if ! wait_for_condition "rotated services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
-      warn "Services did not recover after rotation — falling back to cold boot."
-      tail_vm_logs
-      snapshot_fallback_to_cold_boot
-      return
+      # Wait for the *new* backend (post-supervisor-restart) to actually be
+      # listening. all_ready may briefly return true against the OLD Node
+      # processes between when supervisor sends SIGTERM and when the children
+      # die; sleep a beat so we measure the real readiness.
+      sleep 1
+      if ! wait_for_condition "rotated services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
+        warn "Services did not recover after rotation — falling back to cold boot."
+        tail_vm_logs
+        snapshot_fallback_to_cold_boot
+        return
+      fi
     fi
   else
     if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then
@@ -616,7 +691,11 @@ cmd_start() {
 snapshot_fallback_to_cold_boot() {
   warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..."
   stop_vm
-  rm -rf "$VM_DIR"
+  # Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one,
+  # but keep the CLI-generated runtime-config.iso (we can't regenerate it
+  # from shell — the CLI owns that).
+  rm -f "$VM_DIR/disk.qcow2" "$VM_DIR/base-image.fingerprint" \
+        "$VM_DIR/seed.phantom" "$VM_DIR/bundle.phantom"
   EMULATOR_NO_SNAPSHOT=1
   cmd_start
 }
diff --git a/packages/stack-cli/package.json b/packages/stack-cli/package.json
index 3f574e2413..57a8bfaae7 100644
--- a/packages/stack-cli/package.json
+++ b/packages/stack-cli/package.json
@@ -13,7 +13,8 @@
     "build": "tsdown && node scripts/copy-emulator-assets.mjs",
     "dev": "tsdown --watch",
     "lint": "eslint --ext .tsx,.ts .",
-    "typecheck": "tsc --noEmit"
+    "typecheck": "tsc --noEmit",
+    "test": "vitest run"
   },
   "files": [
     "README.md",
@@ -31,6 +32,7 @@
     "@stackframe/js": "workspace:*",
     "@stackframe/stack-shared": "workspace:*",
     "commander": "^13.1.0",
+    "extract-zip": "^2.0.1",
     "jiti": "^2.4.2"
   },
   "devDependencies": {
diff --git a/packages/stack-cli/src/commands/emulator.test.ts b/packages/stack-cli/src/commands/emulator.test.ts
new file mode 100644
index 0000000000..9cbe9caa16
--- /dev/null
+++ b/packages/stack-cli/src/commands/emulator.test.ts
@@ -0,0 +1,166 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import {
+  envPort,
+  formatBytes,
+  formatDuration,
+  platformInstallHint,
+  renderProgressLine,
+  resolveArch,
+} from "./emulator.js";
+
+describe("formatBytes", () => {
+  it("renders B / KB / MB / GB across unit boundaries", () => {
+    expect(formatBytes(0)).toBe("0 B");
+    expect(formatBytes(1)).toBe("1 B");
+    expect(formatBytes(1023)).toBe("1023 B");
+    expect(formatBytes(1024)).toBe("1.0 KB");
+    expect(formatBytes(1536)).toBe("1.5 KB");
+    expect(formatBytes(1024 * 1024)).toBe("1.0 MB");
+    expect(formatBytes(1024 * 1024 * 1024)).toBe("1.0 GB");
+    expect(formatBytes(1024 * 1024 * 1024 * 1024)).toBe("1.0 TB");
+  });
+
+  it("switches precision at v>=10 within a unit", () => {
+    expect(formatBytes(1024 * 10)).toBe("10 KB");
+    expect(formatBytes(1024 * 9.5)).toBe("9.5 KB");
+  });
+
+  it("returns '?' for non-finite and negative values", () => {
+    expect(formatBytes(NaN)).toBe("?");
+    expect(formatBytes(Infinity)).toBe("?");
+    expect(formatBytes(-1)).toBe("?");
+  });
+
+  it("caps at TB for very large values", () => {
+    // Even if we exceed TB, we don't walk off the end of the units array.
+    const huge = 1024 ** 6; // exabyte-scale
+    expect(formatBytes(huge)).toMatch(/ TB$/);
+  });
+});
+
+describe("formatDuration", () => {
+  it("uses s/m/h units at the right boundaries", () => {
+    expect(formatDuration(0)).toBe("0s");
+    expect(formatDuration(59)).toBe("59s");
+    expect(formatDuration(60)).toBe("1m00s");
+    expect(formatDuration(61)).toBe("1m01s");
+    expect(formatDuration(3599)).toBe("59m59s");
+    expect(formatDuration(3600)).toBe("1h00m");
+    expect(formatDuration(3660)).toBe("1h01m");
+  });
+
+  it("rounds seconds to integers", () => {
+    expect(formatDuration(59.4)).toBe("59s");
+    expect(formatDuration(59.9)).toBe("1m00s");
+  });
+
+  it("returns '?' for non-finite and negative values", () => {
+    expect(formatDuration(NaN)).toBe("?");
+    expect(formatDuration(Infinity)).toBe("?");
+    expect(formatDuration(-1)).toBe("?");
+  });
+});
+
+describe("renderProgressLine", () => {
+  it("renders a known-size progress bar with percent, size, speed, and ETA", () => {
+    const line = renderProgressLine(1024, 2048, 512);
+    expect(line).toContain("50.0%");
+    expect(line).toContain("/");
+    expect(line).toContain("/s");
+    expect(line).toContain("eta");
+  });
+
+  it("hides the percent / ETA fields when total size is unknown (total=0)", () => {
+    const line = renderProgressLine(1024, 0, 512);
+    expect(line).not.toContain("%");
+    expect(line).not.toContain("eta");
+    expect(line).toContain("/s");
+  });
+
+  it("clamps percent at 100 if downloaded overshoots total (rounding)", () => {
+    const line = renderProgressLine(2050, 2048, 100);
+    expect(line).toContain("100.0%");
+  });
+
+  it("handles bytesPerSec = 0 by suppressing ETA", () => {
+    const line = renderProgressLine(512, 2048, 0);
+    expect(line).not.toContain("eta");
+  });
+});
+
+describe("envPort", () => {
+  const SAVED = process.env.__TEST_PORT;
+  beforeEach(() => {
+    delete process.env.__TEST_PORT;
+  });
+  afterEach(() => {
+    if (SAVED === undefined) delete process.env.__TEST_PORT;
+    else process.env.__TEST_PORT = SAVED;
+  });
+
+  it("returns the fallback when the env var is not set", () => {
+    expect(envPort("__TEST_PORT", 1234)).toBe(1234);
+  });
+
+  it("parses a valid integer value", () => {
+    process.env.__TEST_PORT = "9876";
+    expect(envPort("__TEST_PORT", 1234)).toBe(9876);
+  });
+
+  it("rejects zero and negative values", () => {
+    process.env.__TEST_PORT = "0";
+    expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
+    process.env.__TEST_PORT = "-5";
+    expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
+  });
+
+  it("rejects non-integer and non-numeric values", () => {
+    process.env.__TEST_PORT = "3.14";
+    expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
+    process.env.__TEST_PORT = "not-a-port";
+    expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
+  });
+
+  it("treats empty string as not set (returns fallback)", () => {
+    // Regression target: earlier versions sometimes parsed "" as 0 and threw.
+    process.env.__TEST_PORT = "";
+    expect(envPort("__TEST_PORT", 1234)).toBe(1234);
+  });
+});
+
+describe("resolveArch", () => {
+  it("accepts explicit arm64 / amd64", () => {
+    expect(resolveArch("arm64")).toBe("arm64");
+    expect(resolveArch("amd64")).toBe("amd64");
+  });
+
+  it("throws on unsupported explicit arch", () => {
+    expect(() => resolveArch("mips")).toThrow(/Invalid architecture/);
+    expect(() => resolveArch("x86")).toThrow(/Invalid architecture/);
+  });
+
+  it("maps the current process arch when raw is undefined", () => {
+    const expected = process.arch === "arm64" ? "arm64" : process.arch === "x64" ? "amd64" : null;
+    if (expected === null) {
+      expect(() => resolveArch()).toThrow(/Invalid architecture/);
+    } else {
+      expect(resolveArch()).toBe(expected);
+    }
+  });
+});
+
+describe("platformInstallHint", () => {
+  it("uses brew on darwin and apt on linux", () => {
+    const spy = vi.spyOn(process, "platform", "get");
+    try {
+      spy.mockReturnValue("darwin");
+      expect(platformInstallHint("foo-linux", "foo-mac")).toContain("brew install foo-mac");
+      spy.mockReturnValue("linux");
+      expect(platformInstallHint("foo-linux", "foo-mac")).toContain("apt install foo-linux");
+      spy.mockReturnValue("win32");
+      expect(platformInstallHint("foo-linux", "foo-mac")).toContain("install foo-mac");
+    } finally {
+      spy.mockRestore();
+    }
+  });
+});
diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index 7753cc86a5..5967a11038 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -1,5 +1,6 @@
 import { Command } from "commander";
 import { execFileSync, spawn } from "child_process";
+import extract from "extract-zip";
 import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs";
 import { homedir } from "os";
 import { dirname, join, resolve } from "path";
@@ -7,19 +8,36 @@ import { Readable } from "stream";
 import { pipeline } from "stream/promises";
 import { fileURLToPath } from "url";
 import { CliError } from "../lib/errors.js";
+import { writeIso } from "../lib/iso.js";
 
 const DEFAULT_EMULATOR_BACKEND_PORT = 26701;
-
-function emulatorBackendPort(): number {
-  const raw = process.env.EMULATOR_BACKEND_PORT;
-  if (!raw) return DEFAULT_EMULATOR_BACKEND_PORT;
+const DEFAULT_EMULATOR_DASHBOARD_PORT = 26700;
+const DEFAULT_EMULATOR_MINIO_PORT = 26702;
+const DEFAULT_EMULATOR_INBUCKET_PORT = 26703;
+const DEFAULT_PORT_PREFIX = "81";
+const GITHUB_API = "https://api.github.com";
+const DEFAULT_REPO = "stack-auth/stack-auth";
+const AARCH64_FIRMWARE_PATHS = [
+  "/opt/homebrew/share/qemu/edk2-aarch64-code.fd",
+  "/usr/share/qemu/edk2-aarch64-code.fd",
+  "/usr/share/AAVMF/AAVMF_CODE.fd",
+  "/usr/share/qemu-efi-aarch64/QEMU_EFI.fd",
+];
+
+export function envPort(name: string, fallback: number): number {
+  const raw = process.env[name];
+  if (!raw) return fallback;
   const parsed = Number(raw);
   if (!Number.isInteger(parsed) || parsed <= 0) {
-    throw new CliError(`Invalid EMULATOR_BACKEND_PORT: ${raw}`);
+    throw new CliError(`Invalid ${name}: ${raw}`);
   }
   return parsed;
 }
 
+function emulatorBackendPort(): number {
+  return envPort("EMULATOR_BACKEND_PORT", DEFAULT_EMULATOR_BACKEND_PORT);
+}
+
 function emulatorHome(): string {
   return process.env.STACK_EMULATOR_HOME ?? join(homedir(), ".stack", "emulator");
 }
@@ -84,15 +102,40 @@ async function fetchEmulatorCredentials(pck: string, backendPort: number, config
   };
 }
 
-function gh(args: string[]): string {
+// Resolve a GitHub auth token. We try GITHUB_TOKEN first so users can pin a
+// PAT, then fall back to `gh auth token` if the gh CLI is installed and
+// signed in. If neither works we return undefined — public release downloads
+// still work (anonymous, lower rate limit) but artifact downloads fail with a
+// clear error at the call site.
+function githubToken(): string | undefined {
+  if (process.env.GITHUB_TOKEN) return process.env.GITHUB_TOKEN;
   try {
-    return execFileSync("gh", args, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
-  } catch (err: unknown) {
-    if (err instanceof Error && "stderr" in err && typeof err.stderr === "string") {
-      throw new CliError(`GitHub CLI error: ${err.stderr}`);
-    }
-    throw new CliError("GitHub CLI (gh) is required. Install: https://cli.github.com/");
+    const out = execFileSync("gh", ["auth", "token"], {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+    }).trim();
+    return out || undefined;
+  } catch {
+    return undefined;
+  }
+}
+
+async function ghApi<T>(path: string): Promise<T> {
+  const token = githubToken();
+  const headers: Record<string, string> = {
+    Accept: "application/vnd.github+json",
+    "X-GitHub-Api-Version": "2022-11-28",
+  };
+  if (token) headers.Authorization = `Bearer ${token}`;
+  const res = await fetch(`${GITHUB_API}${path}`, { headers });
+  if (!res.ok) {
+    const body = await res.text().catch(() => "");
+    const hint = res.status === 401 || res.status === 403
+      ? " (set GITHUB_TOKEN or run `gh auth login` for higher rate limits / private access)"
+      : "";
+    throw new CliError(`GitHub API ${res.status} ${res.statusText} for ${path}${hint}${body ? `: ${body.slice(0, 300)}` : ""}`);
   }
+  return await (res.json() as Promise<T>);
 }
 
 function emulatorScriptsDir(): string {
@@ -104,6 +147,16 @@ function emulatorScriptsDir(): string {
   throw new CliError("Emulator scripts not found in CLI bundle.");
 }
 
+function baseEnvPath(): string {
+  // Lives one directory up from the scripts dir in both bundled and repo
+  // layouts (dist/.env.development vs docker/local-emulator/.env.development).
+  const path = resolve(emulatorScriptsDir(), "..", ".env.development");
+  if (!existsSync(path)) {
+    throw new CliError(`Emulator base.env not found at ${path}`);
+  }
+  return path;
+}
+
 function emulatorSpawnEnv(extra?: Record<string, string>): NodeJS.ProcessEnv {
   return {
     ...process.env,
@@ -113,6 +166,33 @@ function emulatorSpawnEnv(extra?: Record<string, string>): NodeJS.ProcessEnv {
   };
 }
 
+// Generate the runtime config ISO that the VM mounts via STACKCFG. Replaces
+// the hdiutil/mkisofs/genisoimage host dep — see ../lib/iso.ts.
+function prepareRuntimeConfigIso(): void {
+  const vmDir = join(emulatorRunDir(), "vm");
+  mkdirSync(vmDir, { recursive: true });
+  const portPrefix = process.env.PORT_PREFIX ?? process.env.NEXT_PUBLIC_STACK_PORT_PREFIX ?? DEFAULT_PORT_PREFIX;
+  const dashboardPort = envPort("EMULATOR_DASHBOARD_PORT", DEFAULT_EMULATOR_DASHBOARD_PORT);
+  const backendPort = envPort("EMULATOR_BACKEND_PORT", DEFAULT_EMULATOR_BACKEND_PORT);
+  const minioPort = envPort("EMULATOR_MINIO_PORT", DEFAULT_EMULATOR_MINIO_PORT);
+  const inbucketPort = envPort("EMULATOR_INBUCKET_PORT", DEFAULT_EMULATOR_INBUCKET_PORT);
+
+  const runtimeEnv = [
+    `STACK_EMULATOR_PORT_PREFIX=${portPrefix}`,
+    `STACK_EMULATOR_DASHBOARD_HOST_PORT=${dashboardPort}`,
+    `STACK_EMULATOR_BACKEND_HOST_PORT=${backendPort}`,
+    `STACK_EMULATOR_MINIO_HOST_PORT=${minioPort}`,
+    `STACK_EMULATOR_INBUCKET_HOST_PORT=${inbucketPort}`,
+    `STACK_EMULATOR_VM_DIR_HOST=${vmDir}`,
+    "",
+  ].join("\n");
+  const baseEnv = readFileSync(baseEnvPath());
+  writeIso(join(vmDir, "runtime-config.iso"), "STACKCFG", [
+    { name: "runtime.env", data: Buffer.from(runtimeEnv, "utf-8") },
+    { name: "base.env", data: baseEnv },
+  ]);
+}
+
 function runEmulator(action: string, env?: Record<string, string>): Promise<void> {
   const scriptsDir = emulatorScriptsDir();
   mkdirSync(emulatorRunDir(), { recursive: true });
@@ -149,17 +229,21 @@ async function startEmulator(arch: "arm64" | "amd64"): Promise<void> {
     console.log("No emulator image found. Pulling latest...");
     await pullRelease(arch);
   }
+  prepareRuntimeConfigIso();
   await runEmulator("start", { EMULATOR_ARCH: arch });
 }
 
-function resolveArch(raw?: string): "arm64" | "amd64" {
+export function resolveArch(raw?: string): "arm64" | "amd64" {
   const arch = raw ?? (process.arch === "arm64" ? "arm64" : process.arch === "x64" ? "amd64" : null);
   if (arch === "arm64" || arch === "amd64") return arch;
   throw new CliError(`Invalid architecture: ${raw ?? process.arch}. Expected arm64 or amd64.`);
 }
 
+type ReleaseAsset = { name: string, url: string, size: number };
+type ReleaseResponse = { assets: ReleaseAsset[] };
+
 async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branch?: string, tag?: string } = {}) {
-  const repo = opts.repo ?? "stack-auth/stack-auth";
+  const repo = opts.repo ?? DEFAULT_REPO;
   const branch = opts.branch ?? "dev";
   const tag = opts.tag ?? `emulator-${branch}-latest`;
   const imageDir = emulatorImageDir();
@@ -171,39 +255,36 @@ async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branc
   // back to a cold boot.
   const snapshotAsset = `stack-emulator-${arch}.savevm.zst`;
 
-  const assets = JSON.parse(gh(["release", "view", tag, "--repo", repo, "--json", "assets"])) as {
-    assets: { name: string, apiUrl: string, size: number }[],
-  };
-  const diskMatch = assets.assets.find((a) => a.name === diskAsset);
+  const release = await ghApi<ReleaseResponse>(`/repos/${repo}/releases/tags/${tag}`);
+  const diskMatch = release.assets.find((a) => a.name === diskAsset);
   if (!diskMatch) {
     throw new CliError(`Asset ${diskAsset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`);
   }
-  const snapshotMatch = assets.assets.find((a) => a.name === snapshotAsset);
-  const token = gh(["auth", "token"]);
+  const snapshotMatch = release.assets.find((a) => a.name === snapshotAsset);
+  const token = githubToken();
 
-  await downloadAsset(diskMatch, imageDir, diskAsset, token, tag);
+  await downloadReleaseAsset(diskMatch, imageDir, diskAsset, token, tag);
   if (snapshotMatch) {
-    await downloadAsset(snapshotMatch, imageDir, snapshotAsset, token, tag);
+    await downloadReleaseAsset(snapshotMatch, imageDir, snapshotAsset, token, tag);
   } else {
     console.log(`Snapshot asset ${snapshotAsset} not available in release ${tag}; fast-start disabled for this image.`);
   }
 }
 
-async function downloadAsset(
-  match: { name: string, apiUrl: string, size: number },
+async function downloadReleaseAsset(
+  match: ReleaseAsset,
   imageDir: string,
   asset: string,
-  token: string,
+  token: string | undefined,
   tag: string,
 ): Promise<void> {
   const dest = join(imageDir, asset);
   const tmpDest = `${dest}.download`;
   console.log(`Pulling ${asset} from release ${tag}...`);
+  const headers: Record<string, string> = { Accept: "application/octet-stream" };
+  if (token) headers.Authorization = `Bearer ${token}`;
   try {
-    await downloadWithProgress(match.apiUrl, {
-      Authorization: `Bearer ${token}`,
-      Accept: "application/octet-stream",
-    }, tmpDest, match.size);
+    await downloadWithProgress(match.url, headers, tmpDest, match.size);
   } catch (err) {
     if (existsSync(tmpDest)) unlinkSync(tmpDest);
     if (err instanceof CliError) throw err;
@@ -248,7 +329,7 @@ async function downloadWithProgress(url: string, headers: Record<string, string>
   if (isTty) process.stderr.write("\n");
 }
 
-function renderProgressLine(downloaded: number, total: number, bytesPerSec: number): string {
+export function renderProgressLine(downloaded: number, total: number, bytesPerSec: number): string {
   const barWidth = 30;
   const pct = total > 0 ? Math.min(100, (downloaded / total) * 100) : 0;
   const filled = total > 0 ? Math.round((downloaded / total) * barWidth) : 0;
@@ -260,7 +341,7 @@ function renderProgressLine(downloaded: number, total: number, bytesPerSec: numb
   return `  [${bar}] ${pctStr}  ${sizeStr}  ${speedStr}${etaStr}`;
 }
 
-function formatBytes(bytes: number): string {
+export function formatBytes(bytes: number): string {
   if (!Number.isFinite(bytes) || bytes < 0) return "?";
   const units = ["B", "KB", "MB", "GB", "TB"];
   let v = bytes;
@@ -272,7 +353,7 @@ function formatBytes(bytes: number): string {
   return `${v.toFixed(v < 10 && i > 0 ? 1 : 0)} ${units[i]}`;
 }
 
-function formatDuration(seconds: number): string {
+export function formatDuration(seconds: number): string {
   if (!Number.isFinite(seconds) || seconds < 0) return "?";
   const s = Math.round(seconds);
   if (s < 60) return `${s}s`;
@@ -284,6 +365,116 @@ function formatDuration(seconds: number): string {
   return `${h}h${rm.toString().padStart(2, "0")}m`;
 }
 
+// --- Dependency preflight ---------------------------------------------------
+
+type BinarySpec = { name: string, install: string };
+
+function commandExists(bin: string): boolean {
+  try {
+    execFileSync(process.platform === "win32" ? "where" : "which", [bin], { stdio: "pipe" });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+export function platformInstallHint(linuxPkg: string, macPkg: string): string {
+  switch (process.platform) {
+    case "darwin": {
+      return `brew install ${macPkg}`;
+    }
+    case "linux": {
+      return `apt install ${linuxPkg} (or your distro's equivalent)`;
+    }
+    default: {
+      return `install ${macPkg}`;
+    }
+  }
+}
+
+function bin(name: string, linuxPkg: string, macPkg: string): BinarySpec {
+  return { name, install: platformInstallHint(linuxPkg, macPkg) };
+}
+
+function requireBinaries(commandName: string, bins: BinarySpec[]): void {
+  const missing = bins.filter((b) => !commandExists(b.name));
+  if (missing.length === 0) return;
+  const lines = missing.map((b) => `  - ${b.name}  →  ${b.install}`);
+  throw new CliError(
+    `\`stack emulator ${commandName}\` requires the following missing binaries:\n${lines.join("\n")}`,
+  );
+}
+
+function warnIfMissing(commandName: string, bins: BinarySpec[]): void {
+  const missing = bins.filter((b) => !commandExists(b.name));
+  if (missing.length === 0) return;
+  for (const b of missing) {
+    console.warn(`[stack emulator ${commandName}] optional dep '${b.name}' missing — feature degraded. Install: ${b.install}`);
+  }
+}
+
+function aarch64FirmwareAvailable(): boolean {
+  return AARCH64_FIRMWARE_PATHS.some((p) => existsSync(p));
+}
+
+function commonVmBins(): BinarySpec[] {
+  return [
+    bin("qemu-img", "qemu-utils", "qemu"),
+    bin("socat", "socat", "socat"),
+    bin("curl", "curl", "curl"),
+    bin("nc", "ncat", "netcat"),
+    bin("lsof", "lsof", "lsof"),
+    bin("openssl", "openssl", "openssl"),
+  ];
+}
+
+function archSpecificQemuBin(arch: "arm64" | "amd64"): BinarySpec {
+  if (arch === "arm64") {
+    return bin("qemu-system-aarch64", "qemu-system-arm", "qemu");
+  }
+  return bin("qemu-system-x86_64", "qemu-system-x86", "qemu");
+}
+
+function preflightForVmStart(commandName: string, arch: "arm64" | "amd64"): void {
+  requireBinaries(commandName, [archSpecificQemuBin(arch), ...commonVmBins()]);
+  warnIfMissing(commandName, [bin("zstd", "zstd", "zstd")]);
+  if (arch === "arm64" && !aarch64FirmwareAvailable()) {
+    throw new CliError(
+      `aarch64 UEFI firmware not found. Looked in:\n${AARCH64_FIRMWARE_PATHS.map((p) => `  - ${p}`).join("\n")}\n` +
+      `Install: ${platformInstallHint("qemu-efi-aarch64", "qemu")}`,
+    );
+  }
+}
+
+// --- Workflow run / artifact downloads (replaces `gh run download`) ---------
+
+type WorkflowRunsResponse = { workflow_runs: { id: number }[] };
+type ArtifactsResponse = { artifacts: { id: number, name: string, size_in_bytes: number }[] };
+type PullResponse = { head: { ref: string } };
+
+async function downloadArtifactByName(repo: string, runId: string, name: string, destDir: string): Promise<boolean> {
+  const token = githubToken();
+  if (!token) {
+    throw new CliError(
+      "Downloading workflow run artifacts requires authentication. Set GITHUB_TOKEN or run `gh auth login`.",
+    );
+  }
+  const list = await ghApi<ArtifactsResponse>(`/repos/${repo}/actions/runs/${runId}/artifacts?per_page=100`);
+  const match = list.artifacts.find((a) => a.name === name);
+  if (!match) return false;
+  const zipPath = join(destDir, `${name}.zip`);
+  console.log(`Downloading artifact '${name}' from run ${runId}...`);
+  await downloadWithProgress(
+    `${GITHUB_API}/repos/${repo}/actions/artifacts/${match.id}/zip`,
+    { Accept: "application/octet-stream", Authorization: `Bearer ${token}` },
+    zipPath,
+    match.size_in_bytes,
+  );
+  await extract(zipPath, { dir: destDir });
+  unlinkSync(zipPath);
+  return true;
+}
+
 export function registerEmulatorCommand(program: Command) {
   const emulator = program.command("emulator").description("Manage the QEMU local emulator");
 
@@ -298,16 +489,21 @@ export function registerEmulatorCommand(program: Command) {
     .option("--run <id>", "Pull from a specific workflow run's artifacts")
     .action(async (opts) => {
       const arch = resolveArch(opts.arch);
-      const repo = opts.repo ?? "stack-auth/stack-auth";
+      const repo = opts.repo ?? DEFAULT_REPO;
 
       if (opts.run || opts.pr) {
         let runId = opts.run as string | undefined;
         if (!runId) {
           console.log(`Finding latest successful build for PR #${opts.pr}...`);
-          const { headRefName } = JSON.parse(gh(["pr", "view", opts.pr, "--repo", repo, "--json", "headRefName"]));
-          const runs = JSON.parse(gh(["run", "list", "--repo", repo, "--workflow", "qemu-emulator-build.yaml", "--branch", headRefName, "--status", "success", "--limit", "1", "--json", "databaseId"]));
-          if (runs.length === 0) throw new CliError(`No successful build found for PR #${opts.pr} (branch: ${headRefName}).`);
-          runId = String(runs[0].databaseId);
+          const pr = await ghApi<PullResponse>(`/repos/${repo}/pulls/${opts.pr}`);
+          const headRefName = pr.head.ref;
+          const runs = await ghApi<WorkflowRunsResponse>(
+            `/repos/${repo}/actions/workflows/qemu-emulator-build.yaml/runs?branch=${encodeURIComponent(headRefName)}&status=success&per_page=1`,
+          );
+          if (runs.workflow_runs.length === 0) {
+            throw new CliError(`No successful build found for PR #${opts.pr} (branch: ${headRefName}).`);
+          }
+          runId = String(runs.workflow_runs[0].id);
         }
 
         const imageDir = emulatorImageDir();
@@ -316,21 +512,22 @@ export function registerEmulatorCommand(program: Command) {
         const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`);
         if (existsSync(dest)) unlinkSync(dest);
         if (existsSync(snapshotDest)) unlinkSync(snapshotDest);
-        console.log(`Downloading qemu-emulator-${arch} from workflow run ${runId}...`);
-        try {
-          execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}`, "--dir", imageDir], { stdio: "inherit" });
-        } catch (err) {
-          throw new CliError(`Failed to download artifact from run ${runId}: ${err instanceof Error ? err.message : err}`);
+        const downloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}`, imageDir);
+        if (!downloaded) {
+          throw new CliError(`Artifact qemu-emulator-${arch} not found in workflow run ${runId}.`);
         }
         if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`);
         console.log(`Downloaded: ${dest}`);
         // Snapshot artifact is optional — older CI builds may not produce it.
+        let snapshotDownloaded = false;
         try {
-          execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}-savevm`, "--dir", imageDir], { stdio: "pipe" });
-          if (existsSync(snapshotDest)) {
-            console.log(`Downloaded: ${snapshotDest}`);
-          }
-        } catch {
+          snapshotDownloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}-savevm`, imageDir);
+        } catch (err) {
+          console.log(`Snapshot artifact unavailable for run ${runId}: ${err instanceof Error ? err.message : err}`);
+        }
+        if (snapshotDownloaded && existsSync(snapshotDest)) {
+          console.log(`Downloaded: ${snapshotDest}`);
+        } else if (!snapshotDownloaded) {
           console.log(`Snapshot artifact not available for run ${runId}; fast-start disabled.`);
         }
       } else {
@@ -345,6 +542,7 @@ export function registerEmulatorCommand(program: Command) {
     .option("--config-file <path>", "Path to a config file; when set, credentials for this project are printed to stdout as JSON")
     .action(async (opts: { arch?: string, configFile?: string }) => {
       const arch = resolveArch(opts.arch);
+      preflightForVmStart("start", arch);
 
       let resolvedConfigFile: string | undefined;
       if (opts.configFile) {
@@ -375,6 +573,7 @@ export function registerEmulatorCommand(program: Command) {
     .option("--config-file <path>", "Path to a config file; fetches credentials and injects STACK_PROJECT_ID / STACK_PUBLISHABLE_CLIENT_KEY / STACK_SECRET_SERVER_KEY into the child")
     .action(async (cmd: string, opts: { arch?: string, configFile?: string }) => {
       const arch = resolveArch(opts.arch);
+      preflightForVmStart("run", arch);
 
       let resolvedConfigFile: string | undefined;
       if (opts.configFile) {
@@ -429,18 +628,50 @@ export function registerEmulatorCommand(program: Command) {
       });
     });
 
-  emulator.command("stop").description("Stop the emulator (data preserved; use 'reset' to clear)").action(() => runEmulator("stop"));
-  emulator.command("reset").description("Reset emulator state for a fresh boot").action(() => runEmulator("reset"));
-  emulator.command("status").description("Show emulator and service health").action(() => runEmulator("status"));
+  emulator
+    .command("stop")
+    .description("Stop the emulator (data preserved; use 'reset' to clear)")
+    .action(() => {
+      requireBinaries("stop", [bin("socat", "socat", "socat")]);
+      return runEmulator("stop");
+    });
+
+  emulator
+    .command("reset")
+    .description("Reset emulator state for a fresh boot")
+    .action(() => {
+      requireBinaries("reset", [bin("socat", "socat", "socat")]);
+      return runEmulator("reset");
+    });
+
+  emulator
+    .command("status")
+    .description("Show emulator and service health")
+    .action(() => {
+      requireBinaries("status", [
+        bin("curl", "curl", "curl"),
+        bin("nc", "ncat", "netcat"),
+      ]);
+      return runEmulator("status");
+    });
 
   emulator
     .command("list-releases")
     .description("List available emulator releases")
     .option("--repo <repo>", "GitHub repository (default: stack-auth/stack-auth)")
-    .action((opts) => {
-      const repo = opts.repo ?? "stack-auth/stack-auth";
+    .action(async (opts) => {
+      const repo = opts.repo ?? DEFAULT_REPO;
       console.log(`Available emulator releases from ${repo}:\n`);
-      const lines = gh(["release", "list", "--repo", repo, "--limit", "20"]).split("\n").filter((l) => l.toLowerCase().includes("emulator"));
+      type Release = { tag_name: string, name: string | null, published_at: string | null, draft: boolean, prerelease: boolean };
+      const releases = await ghApi<Release[]>(`/repos/${repo}/releases?per_page=50`);
+      const lines = releases
+        .filter((r) => (r.tag_name + " " + (r.name ?? "")).toLowerCase().includes("emulator"))
+        .slice(0, 20)
+        .map((r) => {
+          const status = r.draft ? "Draft" : r.prerelease ? "Pre-release" : "Latest";
+          const date = r.published_at ? r.published_at.slice(0, 10) : "";
+          return `${r.tag_name}\t${status}\t${date}`;
+        });
       if (lines.length === 0) console.log("No emulator releases found.");
       else for (const line of lines) console.log(line);
     });
diff --git a/packages/stack-cli/src/lib/iso.test.ts b/packages/stack-cli/src/lib/iso.test.ts
new file mode 100644
index 0000000000..91f069e7a5
--- /dev/null
+++ b/packages/stack-cli/src/lib/iso.test.ts
@@ -0,0 +1,259 @@
+import { describe, expect, it } from "vitest";
+import { buildIso, type IsoFile } from "./iso.js";
+
+const SECTOR = 2048;
+
+// --- Test helpers: a minimal ISO 9660 parser, just enough to walk the
+// directory records we produce so tests can assert the bytes we emitted really
+// are addressable at the offsets claimed in the directory records.
+
+function readSector(iso: Buffer, sector: number): Buffer {
+  return iso.subarray(sector * SECTOR, (sector + 1) * SECTOR);
+}
+
+function readVolumeDescriptor(iso: Buffer, sector: number): { type: number, id: string } {
+  const buf = readSector(iso, sector);
+  return { type: buf[0], id: buf.toString("ascii", 1, 6) };
+}
+
+type DirRecord = {
+  lenDr: number,
+  extentSector: number,
+  dataLength: number,
+  isDir: boolean,
+  fileId: Buffer,
+};
+
+function parseDirRecords(sector: Buffer): DirRecord[] {
+  const records: DirRecord[] = [];
+  let offset = 0;
+  while (offset < sector.length) {
+    const lenDr = sector[offset];
+    if (lenDr === 0) break;
+    const extentSector = sector.readUInt32LE(offset + 2);
+    const dataLength = sector.readUInt32LE(offset + 10);
+    const flags = sector[offset + 25];
+    const lenFi = sector[offset + 32];
+    const fileId = sector.subarray(offset + 33, offset + 33 + lenFi);
+    records.push({
+      lenDr,
+      extentSector,
+      dataLength,
+      isDir: (flags & 0x02) !== 0,
+      fileId: Buffer.from(fileId),
+    });
+    offset += lenDr;
+  }
+  return records;
+}
+
+// Follow PVD → root dir → pull file bytes by ISO-9660 name ("NAME.EXT;1").
+function readIsoFile(iso: Buffer, isoName: string): Buffer | null {
+  const pvd = readSector(iso, 16);
+  const rootSector = pvd.readUInt32LE(156 + 2);
+  const rootRecords = parseDirRecords(readSector(iso, rootSector));
+  const match = rootRecords.find((r) => r.fileId.toString("ascii") === isoName);
+  if (!match) return null;
+  const start = match.extentSector * SECTOR;
+  return iso.subarray(start, start + match.dataLength);
+}
+
+// Same, but follow the Joliet SVD (so names are UCS-2 BE).
+function readJolietFile(iso: Buffer, name: string): Buffer | null {
+  const svd = readSector(iso, 17);
+  if (svd[0] !== 2) return null;
+  const rootSector = svd.readUInt32LE(156 + 2);
+  const rootRecords = parseDirRecords(readSector(iso, rootSector));
+  const expected = Buffer.alloc(name.length * 2);
+  for (let i = 0; i < name.length; i++) expected.writeUInt16BE(name.charCodeAt(i), i * 2);
+  const match = rootRecords.find((r) => r.fileId.equals(expected));
+  if (!match) return null;
+  const start = match.extentSector * SECTOR;
+  return iso.subarray(start, start + match.dataLength);
+}
+
+function sampleFile(name: string, size: number, byte = 0x41): IsoFile {
+  return { name, data: Buffer.alloc(size, byte) };
+}
+
+describe("buildIso — structural invariants", () => {
+  it("emits the ISO 9660 standard identifiers at sectors 16, 17, 18", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("hi") }]);
+    expect(readVolumeDescriptor(iso, 16)).toEqual({ type: 1, id: "CD001" });
+    expect(readVolumeDescriptor(iso, 17)).toEqual({ type: 2, id: "CD001" });
+    expect(readVolumeDescriptor(iso, 18)).toEqual({ type: 0xff, id: "CD001" });
+  });
+
+  it("stores the volume identifier verbatim in the PVD for blkid discovery", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]);
+    const pvd = readSector(iso, 16);
+    expect(pvd.toString("ascii", 40, 40 + 8)).toBe("STACKCFG");
+  });
+
+  it("stores the volume identifier in the Joliet SVD as UCS-2 BE", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]);
+    const svd = readSector(iso, 17);
+    const ucs = svd.subarray(40, 40 + 16);
+    let decoded = "";
+    for (let i = 0; i < ucs.length; i += 2) decoded += String.fromCharCode(ucs.readUInt16BE(i));
+    expect(decoded).toBe("STACKCFG");
+  });
+
+  it("sets the Joliet escape sequence %/E", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]);
+    const svd = readSector(iso, 17);
+    expect(svd[88]).toBe(0x25);
+    expect(svd[89]).toBe(0x2f);
+    expect(svd[90]).toBe(0x45);
+  });
+
+  it("declares a volume space size equal to the emitted sector count", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("hello world") }]);
+    const pvd = readSector(iso, 16);
+    const declared = pvd.readUInt32LE(80);
+    expect(iso.length).toBe(declared * SECTOR);
+  });
+});
+
+describe("buildIso — file round-trip", () => {
+  it("makes files readable by ISO 9660 name", () => {
+    const iso = buildIso("STACKCFG", [
+      { name: "runtime.env", data: Buffer.from("KEY=value\n") },
+      { name: "base.env", data: Buffer.from("FOO=bar\n") },
+    ]);
+    expect(readIsoFile(iso, "RUNTIME.ENV;1")?.toString()).toBe("KEY=value\n");
+    expect(readIsoFile(iso, "BASE.ENV;1")?.toString()).toBe("FOO=bar\n");
+  });
+
+  it("makes files readable by Joliet (lowercase) name", () => {
+    const iso = buildIso("STACKCFG", [
+      { name: "runtime.env", data: Buffer.from("KEY=value\n") },
+      { name: "base.env", data: Buffer.from("FOO=bar\n") },
+    ]);
+    expect(readJolietFile(iso, "runtime.env")?.toString()).toBe("KEY=value\n");
+    expect(readJolietFile(iso, "base.env")?.toString()).toBe("FOO=bar\n");
+  });
+
+  it("preserves exact file contents byte-for-byte", () => {
+    const content = Buffer.from([0x00, 0xff, 0x7f, 0x80, 0x41, 0x42, 0x43]);
+    const iso = buildIso("STACKCFG", [{ name: "bin.dat", data: content }]);
+    expect(readJolietFile(iso, "bin.dat")?.equals(content)).toBe(true);
+  });
+
+  it("handles files whose length is exactly one sector", () => {
+    const content = Buffer.alloc(SECTOR, 0x37);
+    const iso = buildIso("STACKCFG", [{ name: "one.bin", data: content }]);
+    expect(readJolietFile(iso, "one.bin")?.equals(content)).toBe(true);
+  });
+
+  it("handles files that span multiple sectors", () => {
+    const content = Buffer.alloc(SECTOR * 3 + 17, 0x55);
+    const iso = buildIso("STACKCFG", [{ name: "big.bin", data: content }]);
+    expect(readJolietFile(iso, "big.bin")?.equals(content)).toBe(true);
+  });
+
+  it("keeps files byte-exact at the claimed extent sector across multi-file layouts", () => {
+    // Fingerprint each file so we can tell them apart even if extents shift.
+    const files: IsoFile[] = [
+      { name: "alpha.bin", data: Buffer.alloc(SECTOR + 5, 0xaa) },
+      { name: "beta.bin", data: Buffer.alloc(SECTOR * 2, 0xbb) },
+      { name: "gamma.bin", data: Buffer.alloc(42, 0xcc) },
+    ];
+    const iso = buildIso("STACKCFG", files);
+    for (const f of files) {
+      expect(readJolietFile(iso, f.name)?.equals(f.data)).toBe(true);
+    }
+  });
+});
+
+describe("buildIso — edge cases", () => {
+  it("handles empty files without misaligning subsequent file extents", () => {
+    // Regression: `padToSector(Buffer.alloc(0))` used to return a 0-byte
+    // buffer, but the layout reserved 1 sector for the empty file — the next
+    // file was then read from the empty file's reserved slot.
+    const files: IsoFile[] = [
+      { name: "empty.txt", data: Buffer.alloc(0) },
+      { name: "after.txt", data: Buffer.from("marker\n") },
+    ];
+    const iso = buildIso("STACKCFG", files);
+    expect(readJolietFile(iso, "empty.txt")?.length).toBe(0);
+    expect(readJolietFile(iso, "after.txt")?.toString()).toBe("marker\n");
+    // And: the declared volume space size must cover every emitted byte.
+    const pvd = readSector(iso, 16);
+    expect(iso.length).toBe(pvd.readUInt32LE(80) * SECTOR);
+  });
+
+  it("writes the exact file length in the directory record (not padded to sector)", () => {
+    const content = Buffer.from("abc");
+    const iso = buildIso("STACKCFG", [{ name: "tiny.txt", data: content }]);
+    const svd = readSector(iso, 17);
+    const rootSector = svd.readUInt32LE(156 + 2);
+    const records = parseDirRecords(readSector(iso, rootSector));
+    const file = records.find((r) => !r.isDir);
+    expect(file?.dataLength).toBe(3);
+  });
+
+  it("places the root directory records for . and .. pointing at the root extent", () => {
+    const iso = buildIso("STACKCFG", [{ name: "x.txt", data: Buffer.from("1") }]);
+    const svd = readSector(iso, 17);
+    const rootSector = svd.readUInt32LE(156 + 2);
+    const records = parseDirRecords(readSector(iso, rootSector));
+    expect(records.length).toBeGreaterThanOrEqual(2);
+    expect(records[0].fileId.equals(Buffer.from([0x00]))).toBe(true);
+    expect(records[1].fileId.equals(Buffer.from([0x01]))).toBe(true);
+    expect(records[0].isDir).toBe(true);
+    expect(records[0].extentSector).toBe(rootSector);
+    expect(records[1].extentSector).toBe(rootSector);
+  });
+
+  it("truncates volume identifiers longer than 32 bytes rather than corrupting the PVD", () => {
+    const longId = "A".repeat(64);
+    const iso = buildIso(longId, [{ name: "x.txt", data: Buffer.from("1") }]);
+    const pvd = readSector(iso, 16);
+    expect(pvd.toString("ascii", 40, 40 + 32)).toBe("A".repeat(32));
+    // Sector 17 should still be the Joliet SVD, not clobbered.
+    expect(pvd[881]).toBe(1);
+    expect(readVolumeDescriptor(iso, 17).type).toBe(2);
+  });
+
+  it("rejects an input set whose root directory record overflows one sector", () => {
+    // Each Joliet dir record for an N-char name is 33 + 2N + (2N even ? 1 : 0)
+    // ≈ 2N + 34 bytes. A sector is 2048. Thirty 30-char names → ~1860 bytes
+    // plus "." + ".." (68) → fits. Eighty of them → well over a sector.
+    const many: IsoFile[] = Array.from({ length: 80 }, (_, i) => ({
+      name: `file-${String(i).padStart(3, "0")}-padding-padding.bin`,
+      data: Buffer.from("x"),
+    }));
+    expect(() => buildIso("STACKCFG", many)).toThrow(/Root directory exceeds/);
+  });
+
+  it("produces a sector-aligned buffer regardless of file sizes", () => {
+    for (const size of [0, 1, SECTOR - 1, SECTOR, SECTOR + 1, SECTOR * 5 - 1]) {
+      const iso = buildIso("STACKCFG", [sampleFile("a.bin", size)]);
+      expect(iso.length % SECTOR).toBe(0);
+    }
+  });
+});
+
+describe("buildIso — multiple file sector layout", () => {
+  it("assigns non-overlapping extents to all files", () => {
+    const files: IsoFile[] = [
+      sampleFile("a.bin", 10, 0x01),
+      sampleFile("b.bin", SECTOR, 0x02),
+      sampleFile("c.bin", SECTOR * 2 + 500, 0x03),
+      sampleFile("d.bin", 1, 0x04),
+    ];
+    const iso = buildIso("STACKCFG", files);
+    const svd = readSector(iso, 17);
+    const rootSector = svd.readUInt32LE(156 + 2);
+    const records = parseDirRecords(readSector(iso, rootSector)).filter((r) => !r.isDir);
+
+    // Extents must be strictly ordered and non-overlapping.
+    const sorted = [...records].sort((a, b) => a.extentSector - b.extentSector);
+    for (let i = 1; i < sorted.length; i++) {
+      const prev = sorted[i - 1];
+      const prevEndSector = prev.extentSector + Math.max(1, Math.ceil(prev.dataLength / SECTOR));
+      expect(sorted[i].extentSector).toBeGreaterThanOrEqual(prevEndSector);
+    }
+  });
+});
diff --git a/packages/stack-cli/src/lib/iso.ts b/packages/stack-cli/src/lib/iso.ts
new file mode 100644
index 0000000000..b226af0bc7
--- /dev/null
+++ b/packages/stack-cli/src/lib/iso.ts
@@ -0,0 +1,403 @@
+// Minimal ISO 9660 + Joliet writer used to package the runtime config blob
+// that the emulator VM mounts at boot via /dev/disk/by-label/STACKCFG.
+//
+// Replaces the host-side dependency on hdiutil/mkisofs/genisoimage. Only the
+// subset of ECMA-119 needed for a single-level root directory of small UTF-8
+// text files is implemented: PVD + Joliet SVD + path tables + root dir + file
+// data. Names are emitted in both ISO 9660 ("BASE.ENV;1") and Joliet
+// (lower-case UCS-2) form so Linux mounts the Joliet view by default and the
+// guest's `source /mnt/stack-runtime/runtime.env` works unchanged.
+
+import { writeFileSync } from "fs";
+
+const SECTOR = 2048;
+
+function bothEndian32(n: number): Buffer {
+  const b = Buffer.alloc(8);
+  b.writeUInt32LE(n, 0);
+  b.writeUInt32BE(n, 4);
+  return b;
+}
+
+function bothEndian16(n: number): Buffer {
+  const b = Buffer.alloc(4);
+  b.writeUInt16LE(n, 0);
+  b.writeUInt16BE(n, 2);
+  return b;
+}
+
+function padString(s: string, len: number, fill = " "): Buffer {
+  const buf = Buffer.alloc(len, fill.charCodeAt(0));
+  buf.write(s.slice(0, len), 0, "ascii");
+  return buf;
+}
+
+function ucs2BE(s: string): Buffer {
+  const buf = Buffer.alloc(s.length * 2);
+  for (let i = 0; i < s.length; i++) {
+    buf.writeUInt16BE(s.charCodeAt(i), i * 2);
+  }
+  return buf;
+}
+
+function padUcs2BE(s: string, byteLen: number): Buffer {
+  const buf = Buffer.alloc(byteLen);
+  const wholeChars = Math.floor(byteLen / 2);
+  for (let i = 0; i < wholeChars; i++) {
+    buf.writeUInt16BE(i < s.length ? s.charCodeAt(i) : 0x0020, i * 2);
+  }
+  // Odd-length fields (e.g. 37-byte Copyright/Abstract/Bibliographic IDs) get
+  // a trailing space byte; spec allows either NUL or 0x20 padding.
+  if (byteLen % 2 === 1) {
+    buf[byteLen - 1] = 0x20;
+  }
+  return buf;
+}
+
+function dirRecordingDate(d: Date): Buffer {
+  const buf = Buffer.alloc(7);
+  buf[0] = d.getUTCFullYear() - 1900;
+  buf[1] = d.getUTCMonth() + 1;
+  buf[2] = d.getUTCDate();
+  buf[3] = d.getUTCHours();
+  buf[4] = d.getUTCMinutes();
+  buf[5] = d.getUTCSeconds();
+  buf[6] = 0;
+  return buf;
+}
+
+function volumeDate(d: Date): Buffer {
+  const pad = (n: number, w: number) => String(n).padStart(w, "0");
+  const s =
+    pad(d.getUTCFullYear(), 4) +
+    pad(d.getUTCMonth() + 1, 2) +
+    pad(d.getUTCDate(), 2) +
+    pad(d.getUTCHours(), 2) +
+    pad(d.getUTCMinutes(), 2) +
+    pad(d.getUTCSeconds(), 2) +
+    "00";
+  const buf = Buffer.alloc(17);
+  buf.write(s, 0, 16, "ascii");
+  buf[16] = 0;
+  return buf;
+}
+
+const UNUSED_VOLUME_DATE = (() => {
+  const buf = Buffer.alloc(17, "0".charCodeAt(0));
+  buf[16] = 0;
+  return buf;
+})();
+
+// Encodes an ISO 9660 file identifier ("FILENAME.EXT;1"). Caller must pass an
+// already-uppercased 8.3 name without the version suffix.
+function isoFileIdentifier(name: string): Buffer {
+  const upper = name.toUpperCase();
+  return Buffer.from(`${upper};1`, "ascii");
+}
+
+// Builds a single directory record. `idBytes` is the file identifier bytes
+// (ASCII for ISO, UCS-2 BE for Joliet); `idForDot` overrides with a single
+// 0x00 / 0x01 byte for "." / ".." entries.
+function buildDirRecord(
+  extentSector: number,
+  dataLength: number,
+  isDir: boolean,
+  recDate: Buffer,
+  idBytes: Buffer,
+): Buffer {
+  const lenFi = idBytes.length;
+  const pad = lenFi % 2 === 0 ? 1 : 0;
+  const lenDr = 33 + lenFi + pad;
+  const buf = Buffer.alloc(lenDr);
+  buf[0] = lenDr;
+  buf[1] = 0;
+  bothEndian32(extentSector).copy(buf, 2);
+  bothEndian32(dataLength).copy(buf, 10);
+  recDate.copy(buf, 18);
+  buf[25] = isDir ? 0x02 : 0x00;
+  buf[26] = 0;
+  buf[27] = 0;
+  bothEndian16(1).copy(buf, 28);
+  buf[32] = lenFi;
+  idBytes.copy(buf, 33);
+  return buf;
+}
+
+function buildRootDirEntries(
+  rootSector: number,
+  rootSize: number,
+  recDate: Buffer,
+  files: { idBytes: Buffer, sector: number, size: number }[],
+): Buffer {
+  const records: Buffer[] = [];
+  records.push(buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x00])));
+  records.push(buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x01])));
+  for (const f of files) {
+    records.push(buildDirRecord(f.sector, f.size, false, recDate, f.idBytes));
+  }
+
+  // Records may not span sector boundaries; pack them with sector padding.
+  const sectors: Buffer[] = [];
+  let current = Buffer.alloc(0);
+  for (const r of records) {
+    if (current.length + r.length > SECTOR) {
+      sectors.push(Buffer.concat([current, Buffer.alloc(SECTOR - current.length)]));
+      current = Buffer.alloc(0);
+    }
+    current = Buffer.concat([current, r]);
+  }
+  if (current.length > 0) {
+    sectors.push(Buffer.concat([current, Buffer.alloc(SECTOR - current.length)]));
+  }
+  return Buffer.concat(sectors);
+}
+
+// Single-entry path table for the root directory. Used for both L (LE) and M
+// (BE) tables; pass writeUInt32LE/BE accordingly.
+function buildPathTable(rootSector: number, byteOrder: "LE" | "BE"): Buffer {
+  const buf = Buffer.alloc(10);
+  buf[0] = 1; // LEN_DI
+  buf[1] = 0; // EAR length
+  if (byteOrder === "LE") {
+    buf.writeUInt32LE(rootSector, 2);
+    buf.writeUInt16LE(1, 6);
+  } else {
+    buf.writeUInt32BE(rootSector, 2);
+    buf.writeUInt16BE(1, 6);
+  }
+  buf[8] = 0; // root identifier
+  buf[9] = 0; // pad
+  return buf;
+}
+
+function padToSector(buf: Buffer): Buffer {
+  const rem = buf.length % SECTOR;
+  if (rem === 0) return buf;
+  return Buffer.concat([buf, Buffer.alloc(SECTOR - rem)]);
+}
+
+// Build a Volume Descriptor (PVD or Joliet SVD). `joliet` switches volume-name
+// fields to UCS-2 BE and sets the Joliet escape sequence.
+function buildVolumeDescriptor(opts: {
+  joliet: boolean,
+  volumeId: string,
+  volumeSpaceSize: number,
+  pathTableSize: number,
+  lPathSector: number,
+  mPathSector: number,
+  rootDirRecord: Buffer,
+  date: Buffer,
+}): Buffer {
+  const buf = Buffer.alloc(SECTOR);
+  buf[0] = opts.joliet ? 2 : 1;
+  buf.write("CD001", 1, 5, "ascii");
+  buf[6] = 1;
+  buf[7] = 0;
+
+  // System Identifier (32 bytes)
+  if (opts.joliet) {
+    padUcs2BE("", 32).copy(buf, 8);
+  } else {
+    padString("", 32).copy(buf, 8);
+  }
+
+  // Volume Identifier (32 bytes) — must be "STACKCFG" so udev exposes it as
+  // /dev/disk/by-label/STACKCFG. blkid reads from PVD by default but Joliet
+  // takes precedence when both are present.
+  if (opts.joliet) {
+    padUcs2BE(opts.volumeId, 32).copy(buf, 40);
+  } else {
+    padString(opts.volumeId, 32).copy(buf, 40);
+  }
+
+  bothEndian32(opts.volumeSpaceSize).copy(buf, 80);
+
+  if (opts.joliet) {
+    // Escape sequence for UCS-2 Level 3 ("%/E") at offset 88 (32 bytes).
+    buf[88] = 0x25;
+    buf[89] = 0x2f;
+    buf[90] = 0x45;
+  }
+
+  bothEndian16(1).copy(buf, 120); // Volume Set Size
+  bothEndian16(1).copy(buf, 124); // Volume Sequence Number
+  bothEndian16(SECTOR).copy(buf, 128); // Logical Block Size
+  bothEndian32(opts.pathTableSize).copy(buf, 132);
+  buf.writeUInt32LE(opts.lPathSector, 140);
+  buf.writeUInt32LE(0, 144); // optional L
+  buf.writeUInt32BE(opts.mPathSector, 148);
+  buf.writeUInt32BE(0, 152); // optional M
+
+  opts.rootDirRecord.copy(buf, 156);
+
+  const padFn = opts.joliet
+    ? (s: string, n: number) => padUcs2BE(s, n)
+    : (s: string, n: number) => padString(s, n);
+
+  padFn("", 128).copy(buf, 190); // Volume Set Identifier
+  padFn("", 128).copy(buf, 318); // Publisher Identifier
+  padFn("", 128).copy(buf, 446); // Data Preparer Identifier
+  padFn("", 128).copy(buf, 574); // Application Identifier
+  padFn("", 37).copy(buf, 702); // Copyright File Identifier
+  padFn("", 37).copy(buf, 739); // Abstract File Identifier
+  padFn("", 37).copy(buf, 776); // Bibliographic File Identifier
+
+  opts.date.copy(buf, 813); // Creation
+  opts.date.copy(buf, 830); // Modification
+  UNUSED_VOLUME_DATE.copy(buf, 847); // Expiration
+  UNUSED_VOLUME_DATE.copy(buf, 864); // Effective
+
+  buf[881] = 1; // File Structure Version
+  return buf;
+}
+
+function buildVolumeDescriptorTerminator(): Buffer {
+  const buf = Buffer.alloc(SECTOR);
+  buf[0] = 0xff;
+  buf.write("CD001", 1, 5, "ascii");
+  buf[6] = 1;
+  return buf;
+}
+
+// Builds the 34-byte root directory record that lives inside the volume
+// descriptor (BP 157-190 of PVD/SVD). Identical layout to a regular directory
+// record but identifier is the single byte 0x00.
+function buildRootDirRecordInVD(rootSector: number, rootSize: number, recDate: Buffer): Buffer {
+  return buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x00]));
+}
+
+export type IsoFile = { name: string, data: Buffer };
+
+export function buildIso(volumeId: string, files: IsoFile[]): Buffer {
+  const date = new Date();
+  const recDate = dirRecordingDate(date);
+  const volDateBuf = volumeDate(date);
+
+  // Compute per-file directory record sizes for both views.
+  const isoEntries = files.map((f) => ({
+    file: f,
+    idBytes: isoFileIdentifier(f.name),
+  }));
+  const jolietEntries = files.map((f) => ({
+    file: f,
+    idBytes: ucs2BE(f.name),
+  }));
+
+  // We need root sector + size before we know file sectors — but file sectors
+  // depend only on the root dir size, which depends only on the file count.
+  // Compute the root dir buffer twice if needed (sizes are stable since they
+  // depend only on identifier bytes, not on file extents).
+  const dirRecLen = (lenFi: number) => 33 + lenFi + (lenFi % 2 === 0 ? 1 : 0);
+  const isoRootSize = 34 + 34 + isoEntries.reduce((acc, e) => acc + dirRecLen(e.idBytes.length), 0);
+  const jolietRootSize = 34 + 34 + jolietEntries.reduce((acc, e) => acc + dirRecLen(e.idBytes.length), 0);
+  if (isoRootSize > SECTOR || jolietRootSize > SECTOR) {
+    throw new Error(`Root directory exceeds ${SECTOR} bytes; multi-sector root not supported.`);
+  }
+
+  // Sector layout.
+  const sysAreaSectors = 16;
+  const pvdSector = sysAreaSectors;
+  const svdSector = pvdSector + 1;
+  const termSector = svdSector + 1;
+  const isoLPathSector = termSector + 1;
+  const isoMPathSector = isoLPathSector + 1;
+  const jolietLPathSector = isoMPathSector + 1;
+  const jolietMPathSector = jolietLPathSector + 1;
+  const isoRootSector = jolietMPathSector + 1;
+  const jolietRootSector = isoRootSector + 1;
+  let nextSector = jolietRootSector + 1;
+
+  const fileLayout = files.map((f) => {
+    const sector = nextSector;
+    const sectors = Math.max(1, Math.ceil(f.data.length / SECTOR));
+    nextSector += sectors;
+    return { file: f, sector, size: f.data.length };
+  });
+
+  const totalSectors = nextSector;
+  const pathTableSize = 10;
+
+  const isoRootDirRecordVD = buildRootDirRecordInVD(isoRootSector, SECTOR, recDate);
+  const jolietRootDirRecordVD = buildRootDirRecordInVD(jolietRootSector, SECTOR, recDate);
+
+  const pvd = buildVolumeDescriptor({
+    joliet: false,
+    volumeId,
+    volumeSpaceSize: totalSectors,
+    pathTableSize,
+    lPathSector: isoLPathSector,
+    mPathSector: isoMPathSector,
+    rootDirRecord: isoRootDirRecordVD,
+    date: volDateBuf,
+  });
+
+  const svd = buildVolumeDescriptor({
+    joliet: true,
+    volumeId,
+    volumeSpaceSize: totalSectors,
+    pathTableSize,
+    lPathSector: jolietLPathSector,
+    mPathSector: jolietMPathSector,
+    rootDirRecord: jolietRootDirRecordVD,
+    date: volDateBuf,
+  });
+
+  const term = buildVolumeDescriptorTerminator();
+  const isoLPath = padToSector(buildPathTable(isoRootSector, "LE"));
+  const isoMPath = padToSector(buildPathTable(isoRootSector, "BE"));
+  const jolietLPath = padToSector(buildPathTable(jolietRootSector, "LE"));
+  const jolietMPath = padToSector(buildPathTable(jolietRootSector, "BE"));
+
+  const isoRoot = buildRootDirEntries(
+    isoRootSector,
+    SECTOR,
+    recDate,
+    isoEntries.map((e, i) => ({
+      idBytes: e.idBytes,
+      sector: fileLayout[i].sector,
+      size: fileLayout[i].size,
+    })),
+  );
+  const jolietRoot = buildRootDirEntries(
+    jolietRootSector,
+    SECTOR,
+    recDate,
+    jolietEntries.map((e, i) => ({
+      idBytes: e.idBytes,
+      sector: fileLayout[i].sector,
+      size: fileLayout[i].size,
+    })),
+  );
+
+  // Each file must occupy the exact number of sectors the layout reserved for
+  // it. An empty file reserves 1 sector (via Math.max(1, …)) but
+  // padToSector(Buffer.alloc(0)) returns 0 bytes — that would desync every
+  // subsequent file's extent. Explicitly pad to the reserved size instead.
+  const fileBuffers = fileLayout.map((f) => {
+    const reservedSectors = Math.max(1, Math.ceil(f.file.data.length / SECTOR));
+    const reservedBytes = reservedSectors * SECTOR;
+    if (f.file.data.length === reservedBytes) return f.file.data;
+    const out = Buffer.alloc(reservedBytes);
+    f.file.data.copy(out, 0);
+    return out;
+  });
+
+  return Buffer.concat([
+    Buffer.alloc(sysAreaSectors * SECTOR),
+    pvd,
+    svd,
+    term,
+    isoLPath,
+    isoMPath,
+    jolietLPath,
+    jolietMPath,
+    isoRoot,
+    jolietRoot,
+    ...fileBuffers,
+  ]);
+}
+
+export function writeIso(path: string, volumeId: string, files: IsoFile[]): void {
+  const buf = buildIso(volumeId, files);
+  writeFileSync(path, buf);
+}
diff --git a/packages/stack-cli/vitest.config.ts b/packages/stack-cli/vitest.config.ts
new file mode 100644
index 0000000000..0caeccbe44
--- /dev/null
+++ b/packages/stack-cli/vitest.config.ts
@@ -0,0 +1,19 @@
+import { defineConfig, mergeConfig } from 'vitest/config';
+import sharedConfig from '../../vitest.shared';
+
+export default mergeConfig(
+  sharedConfig,
+  defineConfig({
+    test: {
+      // Override the shared `maxWorkers: 8` — with it set, tinypool defaults
+      // minThreads to the host's available parallelism, producing
+      // "minThreads/maxThreads must not conflict" on machines with >8 cores.
+      poolOptions: {
+        threads: {
+          minThreads: 1,
+          maxThreads: 4,
+        },
+      },
+    },
+  }),
+);
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index dd890a4bd2..5c2364de0a 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -737,7 +737,7 @@ importers:
         version: 1.166.6(crossws@0.4.4(srvx@0.8.16))
       nitro:
         specifier: ^3.0.0
-        version: 3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(rolldown@1.0.0-rc.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2)
+        version: 3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2)
       react:
         specifier: 19.2.1
         version: 19.2.1
@@ -950,7 +950,7 @@ importers:
     devDependencies:
       mint:
         specifier: ^4.2.487
-        version: 4.2.487(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@24.9.2)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)
+        version: 4.2.487(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@20.17.6)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)
 
   examples/cjs-test:
     dependencies:
@@ -1498,10 +1498,10 @@ importers:
         version: link:../../packages/stack
       '@supabase/ssr':
         specifier: latest
-        version: 0.10.0(@supabase/supabase-js@2.101.1)
+        version: 0.10.0(@supabase/supabase-js@2.102.1)
       '@supabase/supabase-js':
         specifier: latest
-        version: 2.101.1
+        version: 2.102.1
       jose:
         specifier: ^5.2.2
         version: 5.6.3
@@ -2024,6 +2024,9 @@ importers:
       commander:
         specifier: ^13.1.0
         version: 13.1.0
+      extract-zip:
+        specifier: ^2.0.1
+        version: 2.0.1
       jiti:
         specifier: ^2.4.2
         version: 2.6.1
@@ -9760,23 +9763,23 @@ packages:
     resolution: {integrity: sha512-SXuhqhuR5FXaYgKTXzZJeqtVA6JKb9IZWaGeEUxHHiOcFy2p51wccO72bYpXwoK4D5pzQOIYLTuAc7etxyMmwg==}
     engines: {node: '>=12.16'}
 
-  '@supabase/auth-js@2.101.1':
-    resolution: {integrity: sha512-Kd0Wey+RkFHgyVep7adS6UOE2pN6MJ3mZ32PAXSvfw6IjUkFRC7IQpdZZjUOcUe5pXr1ejufCRgF6lsGINe4Tw==}
+  '@supabase/auth-js@2.102.1':
+    resolution: {integrity: sha512-2uH2WB0H98TOGDtaFWhxIcR42Dro/VB7VDZanz/4bVJsqioIue1m3TUqu3xciDm2W9r+1LXQvYNsYbQfWmD+uQ==}
     engines: {node: '>=20.0.0'}
 
-  '@supabase/functions-js@2.101.1':
-    resolution: {integrity: sha512-OZWU7YtaG+NNNFZK8p/FuJ6gpq7pFyrG2fLOopP73HAIDHDGpOttPJapvO8ADu3RkqfQfkwrB354vPkSBbZ20A==}
+  '@supabase/functions-js@2.102.1':
+    resolution: {integrity: sha512-UcrcKTPnAIo+Yp9Jjq9XXwFbsmgRYY637mwka9ZjmTIWcX/xr1pote4OVvaGQycVY1KTiQgjMvpC0Q0yJhRq3w==}
     engines: {node: '>=20.0.0'}
 
   '@supabase/phoenix@0.4.0':
     resolution: {integrity: sha512-RHSx8bHS02xwfHdAbX5Lpbo6PXbgyf7lTaXTlwtFDPwOIw64NnVRwFAXGojHhjtVYI+PEPNSWwkL90f4agN3bw==}
 
-  '@supabase/postgrest-js@2.101.1':
-    resolution: {integrity: sha512-UW1RajH5jbZoK+ldAJ1I6VZ+HWwZ2oaKjEQ6Gn+AQ67CHQVxGl8wNQoLYyumbyaExm41I+wn7arulcY1eHeZJw==}
+  '@supabase/postgrest-js@2.102.1':
+    resolution: {integrity: sha512-InLvXKAYf8BIqiv9jWOYudWB3rU8A9uMbcip5BQ5sLLNPrbO1Ekkr79OvlhZBgMNSppxVyC7wPPGzLxMcTZhlA==}
     engines: {node: '>=20.0.0'}
 
-  '@supabase/realtime-js@2.101.1':
-    resolution: {integrity: sha512-Oa6dno0OB9I+hv5do5zsZHbFu41ViZnE9IWjmkeeF/8fPmB5fWoHGqeTYEC3/0DAgtpUoFJa4FpvzFH0SBHo1Q==}
+  '@supabase/realtime-js@2.102.1':
+    resolution: {integrity: sha512-h2fCumib/v6u7XMwSPgxnpfimjX4xCEayUHrxWLC7UurfQjUZJ0pmJDgm6yj80DnUerxuulRghwm5zXYysFG/Q==}
     engines: {node: '>=20.0.0'}
 
   '@supabase/ssr@0.10.0':
@@ -9784,12 +9787,12 @@ packages:
     peerDependencies:
       '@supabase/supabase-js': ^2.100.1
 
-  '@supabase/storage-js@2.101.1':
-    resolution: {integrity: sha512-WhTaUOBgeEvnKLy95Cdlp6+D5igSF/65yC727w1olxbet5nzUvMlajKUWyzNtQu2efrz2cQ7FcdVBdQqgT9YKQ==}
+  '@supabase/storage-js@2.102.1':
+    resolution: {integrity: sha512-eCL9T4Xpe40nmKlkUJ7Zq/hk34db1xPiT0WL3Iv5MbJqHuCAe5TxhV8Rjqd6DNZrzjtfYObZtYl9jKJaHrivqw==}
     engines: {node: '>=20.0.0'}
 
-  '@supabase/supabase-js@2.101.1':
-    resolution: {integrity: sha512-Jnhm3LfuACwjIzvk2pfUbGQn7pa7hi6MFzfSyPrRYWVCCu69RPLCFyHSBl7HSBwadbQ3UZOznnD3gPca3ePrRA==}
+  '@supabase/supabase-js@2.102.1':
+    resolution: {integrity: sha512-bChxPVeLDnYN9M2d/u4fXsvylwSQG5grAl+HN8f+ZD9a9PuVU+Ru+xGmEsk+b9Iz3rJC9ZQnQUJYQ28fApdWYA==}
     engines: {node: '>=20.0.0'}
 
   '@sveltejs/sv-utils@0.0.3':
@@ -11267,6 +11270,7 @@ packages:
   basic-ftp@5.2.0:
     resolution: {integrity: sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==}
     engines: {node: '>=10.0.0'}
+    deprecated: Security vulnerability fixed in 5.2.1, please upgrade
 
   bcrypt@6.0.0:
     resolution: {integrity: sha512-cU8v/EGSrnH+HnxV2z0J7/blxH8gq7Xh2JFT6Aroax7UohdmiJJlxApMxtKfuI7z68NvvVcmR78k2LbT6efhRg==}
@@ -13398,6 +13402,7 @@ packages:
 
   freestyle-sandboxes@0.1.6:
     resolution: {integrity: sha512-zfyJy+DgmheFjCAPYMklo7rpzvuxNP46rB0a9WfNBEmitYGE23nlbjyTy8qdrmVuCVCoMIDQQzzJRkyuh0Szqg==}
+    deprecated: This package has been deprecated. Please use freestyle instead.
 
   fresh@0.5.2:
     resolution: {integrity: sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==}
@@ -22478,16 +22483,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/checkbox@4.3.2(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/ansi': 1.0.2
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/figures': 1.0.15
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-      yoctocolors-cjs: 2.1.3
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/confirm@5.1.21(@types/node@20.17.6)':
     dependencies:
       '@inquirer/core': 10.3.2(@types/node@20.17.6)
@@ -22495,13 +22490,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/confirm@5.1.21(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/core@10.3.2(@types/node@20.17.6)':
     dependencies:
       '@inquirer/ansi': 1.0.2
@@ -22515,19 +22503,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/core@10.3.2(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/ansi': 1.0.2
-      '@inquirer/figures': 1.0.15
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-      cli-width: 4.1.0
-      mute-stream: 2.0.0
-      signal-exit: 4.1.0
-      wrap-ansi: 6.2.0
-      yoctocolors-cjs: 2.1.3
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/editor@4.2.23(@types/node@20.17.6)':
     dependencies:
       '@inquirer/core': 10.3.2(@types/node@20.17.6)
@@ -22536,14 +22511,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/editor@4.2.23(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/external-editor': 1.0.3(@types/node@24.9.2)
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/expand@4.0.23(@types/node@20.17.6)':
     dependencies:
       '@inquirer/core': 10.3.2(@types/node@20.17.6)
@@ -22552,14 +22519,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/expand@4.0.23(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-      yoctocolors-cjs: 2.1.3
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/external-editor@1.0.3(@types/node@20.17.6)':
     dependencies:
       chardet: 2.1.1
@@ -22567,13 +22526,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/external-editor@1.0.3(@types/node@24.9.2)':
-    dependencies:
-      chardet: 2.1.1
-      iconv-lite: 0.7.0
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/figures@1.0.15': {}
 
   '@inquirer/figures@1.0.3': {}
@@ -22585,13 +22537,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/input@4.3.1(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/number@3.0.23(@types/node@20.17.6)':
     dependencies:
       '@inquirer/core': 10.3.2(@types/node@20.17.6)
@@ -22599,13 +22544,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/number@3.0.23(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/password@4.0.23(@types/node@20.17.6)':
     dependencies:
       '@inquirer/ansi': 1.0.2
@@ -22614,14 +22552,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/password@4.0.23(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/ansi': 1.0.2
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/prompts@7.10.1(@types/node@20.17.6)':
     dependencies:
       '@inquirer/checkbox': 4.3.2(@types/node@20.17.6)
@@ -22637,35 +22567,20 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/prompts@7.10.1(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/checkbox': 4.3.2(@types/node@24.9.2)
-      '@inquirer/confirm': 5.1.21(@types/node@24.9.2)
-      '@inquirer/editor': 4.2.23(@types/node@24.9.2)
-      '@inquirer/expand': 4.0.23(@types/node@24.9.2)
-      '@inquirer/input': 4.3.1(@types/node@24.9.2)
-      '@inquirer/number': 3.0.23(@types/node@24.9.2)
-      '@inquirer/password': 4.0.23(@types/node@24.9.2)
-      '@inquirer/rawlist': 4.1.11(@types/node@24.9.2)
-      '@inquirer/search': 3.2.2(@types/node@24.9.2)
-      '@inquirer/select': 4.4.2(@types/node@24.9.2)
-    optionalDependencies:
-      '@types/node': 24.9.2
-
-  '@inquirer/prompts@7.9.0(@types/node@24.9.2)':
+  '@inquirer/prompts@7.9.0(@types/node@20.17.6)':
     dependencies:
-      '@inquirer/checkbox': 4.3.2(@types/node@24.9.2)
-      '@inquirer/confirm': 5.1.21(@types/node@24.9.2)
-      '@inquirer/editor': 4.2.23(@types/node@24.9.2)
-      '@inquirer/expand': 4.0.23(@types/node@24.9.2)
-      '@inquirer/input': 4.3.1(@types/node@24.9.2)
-      '@inquirer/number': 3.0.23(@types/node@24.9.2)
-      '@inquirer/password': 4.0.23(@types/node@24.9.2)
-      '@inquirer/rawlist': 4.1.11(@types/node@24.9.2)
-      '@inquirer/search': 3.2.2(@types/node@24.9.2)
-      '@inquirer/select': 4.4.2(@types/node@24.9.2)
+      '@inquirer/checkbox': 4.3.2(@types/node@20.17.6)
+      '@inquirer/confirm': 5.1.21(@types/node@20.17.6)
+      '@inquirer/editor': 4.2.23(@types/node@20.17.6)
+      '@inquirer/expand': 4.0.23(@types/node@20.17.6)
+      '@inquirer/input': 4.3.1(@types/node@20.17.6)
+      '@inquirer/number': 3.0.23(@types/node@20.17.6)
+      '@inquirer/password': 4.0.23(@types/node@20.17.6)
+      '@inquirer/rawlist': 4.1.11(@types/node@20.17.6)
+      '@inquirer/search': 3.2.2(@types/node@20.17.6)
+      '@inquirer/select': 4.4.2(@types/node@20.17.6)
     optionalDependencies:
-      '@types/node': 24.9.2
+      '@types/node': 20.17.6
 
   '@inquirer/rawlist@4.1.11(@types/node@20.17.6)':
     dependencies:
@@ -22675,14 +22590,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/rawlist@4.1.11(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-      yoctocolors-cjs: 2.1.3
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/search@3.2.2(@types/node@20.17.6)':
     dependencies:
       '@inquirer/core': 10.3.2(@types/node@20.17.6)
@@ -22692,15 +22599,6 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/search@3.2.2(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/figures': 1.0.15
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-      yoctocolors-cjs: 2.1.3
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/select@4.4.2(@types/node@20.17.6)':
     dependencies:
       '@inquirer/ansi': 1.0.2
@@ -22711,24 +22609,10 @@ snapshots:
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/select@4.4.2(@types/node@24.9.2)':
-    dependencies:
-      '@inquirer/ansi': 1.0.2
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/figures': 1.0.15
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-      yoctocolors-cjs: 2.1.3
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@inquirer/type@3.0.10(@types/node@20.17.6)':
     optionalDependencies:
       '@types/node': 20.17.6
 
-  '@inquirer/type@3.0.10(@types/node@24.9.2)':
-    optionalDependencies:
-      '@types/node': 24.9.2
-
   '@isaacs/cliui@8.0.2':
     dependencies:
       string-width: 5.1.2
@@ -22866,9 +22750,9 @@ snapshots:
     dependencies:
       langium: 3.3.1
 
-  '@mintlify/cli@4.0.1090(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@24.9.2)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)':
+  '@mintlify/cli@4.0.1090(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@20.17.6)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)':
     dependencies:
-      '@inquirer/prompts': 7.9.0(@types/node@24.9.2)
+      '@inquirer/prompts': 7.9.0(@types/node@20.17.6)
       '@mintlify/common': 1.0.835(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(react@19.2.3)(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)
       '@mintlify/link-rot': 3.0.1010(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(react@19.2.3)(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)
       '@mintlify/prebuild': 1.0.977(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(react@19.2.3)(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)
@@ -22881,7 +22765,7 @@ snapshots:
       front-matter: 4.0.2
       fs-extra: 11.2.0
       ink: 6.3.0(@types/react@18.3.12)(react@19.2.3)
-      inquirer: 12.3.0(@types/node@24.9.2)
+      inquirer: 12.3.0(@types/node@20.17.6)
       js-yaml: 4.1.0
       mdast-util-mdx-jsx: 3.2.0
       open: 8.4.2
@@ -29249,21 +29133,21 @@ snapshots:
 
   '@stripe/stripe-js@7.7.0': {}
 
-  '@supabase/auth-js@2.101.1':
+  '@supabase/auth-js@2.102.1':
     dependencies:
       tslib: 2.8.1
 
-  '@supabase/functions-js@2.101.1':
+  '@supabase/functions-js@2.102.1':
     dependencies:
       tslib: 2.8.1
 
   '@supabase/phoenix@0.4.0': {}
 
-  '@supabase/postgrest-js@2.101.1':
+  '@supabase/postgrest-js@2.102.1':
     dependencies:
       tslib: 2.8.1
 
-  '@supabase/realtime-js@2.101.1':
+  '@supabase/realtime-js@2.102.1':
     dependencies:
       '@supabase/phoenix': 0.4.0
       '@types/ws': 8.18.1
@@ -29273,23 +29157,23 @@ snapshots:
       - bufferutil
       - utf-8-validate
 
-  '@supabase/ssr@0.10.0(@supabase/supabase-js@2.101.1)':
+  '@supabase/ssr@0.10.0(@supabase/supabase-js@2.102.1)':
     dependencies:
-      '@supabase/supabase-js': 2.101.1
+      '@supabase/supabase-js': 2.102.1
       cookie: 1.0.2
 
-  '@supabase/storage-js@2.101.1':
+  '@supabase/storage-js@2.102.1':
     dependencies:
       iceberg-js: 0.8.1
       tslib: 2.8.1
 
-  '@supabase/supabase-js@2.101.1':
+  '@supabase/supabase-js@2.102.1':
     dependencies:
-      '@supabase/auth-js': 2.101.1
-      '@supabase/functions-js': 2.101.1
-      '@supabase/postgrest-js': 2.101.1
-      '@supabase/realtime-js': 2.101.1
-      '@supabase/storage-js': 2.101.1
+      '@supabase/auth-js': 2.102.1
+      '@supabase/functions-js': 2.102.1
+      '@supabase/postgrest-js': 2.102.1
+      '@supabase/realtime-js': 2.102.1
+      '@supabase/storage-js': 2.102.1
     transitivePeerDependencies:
       - bufferutil
       - utf-8-validate
@@ -30017,6 +29901,7 @@ snapshots:
   '@types/node@24.9.2':
     dependencies:
       undici-types: 7.16.0
+    optional: true
 
   '@types/nodemailer@6.4.15':
     dependencies:
@@ -33134,7 +33019,7 @@ snapshots:
       debug: 4.4.3
       enhanced-resolve: 5.17.1
       eslint: 8.57.1
-      eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1)
+      eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
       fast-glob: 3.3.3
       get-tsconfig: 4.8.1
       is-bun-module: 1.2.1
@@ -33177,7 +33062,7 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
-  eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1):
+  eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1):
     dependencies:
       debug: 3.2.7
     optionalDependencies:
@@ -33255,7 +33140,7 @@ snapshots:
       doctrine: 2.1.0
       eslint: 8.57.1
       eslint-import-resolver-node: 0.3.9
-      eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1)
+      eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
       hasown: 2.0.2
       is-core-module: 2.15.1
       is-glob: 4.0.3
@@ -35159,12 +35044,12 @@ snapshots:
       react: 19.2.3
       react-dom: 19.2.3(react@19.2.3)
 
-  inquirer@12.3.0(@types/node@24.9.2):
+  inquirer@12.3.0(@types/node@20.17.6):
     dependencies:
-      '@inquirer/core': 10.3.2(@types/node@24.9.2)
-      '@inquirer/prompts': 7.10.1(@types/node@24.9.2)
-      '@inquirer/type': 3.0.10(@types/node@24.9.2)
-      '@types/node': 24.9.2
+      '@inquirer/core': 10.3.2(@types/node@20.17.6)
+      '@inquirer/prompts': 7.10.1(@types/node@20.17.6)
+      '@inquirer/type': 3.0.10(@types/node@20.17.6)
+      '@types/node': 20.17.6
       ansi-escapes: 4.3.2
       mute-stream: 2.0.0
       run-async: 3.0.0
@@ -36641,9 +36526,9 @@ snapshots:
     dependencies:
       minipass: 7.1.2
 
-  mint@4.2.487(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@24.9.2)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0):
+  mint@4.2.487(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@20.17.6)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0):
     dependencies:
-      '@mintlify/cli': 4.0.1090(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@24.9.2)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)
+      '@mintlify/cli': 4.0.1090(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@20.17.6)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)
     transitivePeerDependencies:
       - '@radix-ui/react-popover'
       - '@types/node'
@@ -37086,7 +36971,7 @@ snapshots:
       jsonpath-plus: 10.4.0
       lodash.topath: 4.5.2
 
-  nitro@3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(rolldown@1.0.0-rc.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2):
+  nitro@3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2):
     dependencies:
       consola: 3.4.2
       cookie-es: 2.0.0
@@ -37106,7 +36991,6 @@ snapshots:
       unenv: 2.0.0-rc.21
       unstorage: 2.0.0-alpha.3(chokidar@4.0.3)(db0@0.3.4(@electric-sql/pglite@0.3.2)(mysql2@3.15.3))(lru-cache@11.2.2)(ofetch@1.5.1)
     optionalDependencies:
-      rolldown: 1.0.0-rc.3
       vite: 7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0)
       xml2js: 0.6.2
     transitivePeerDependencies:
@@ -40784,7 +40668,8 @@ snapshots:
 
   undici-types@6.21.0: {}
 
-  undici-types@7.16.0: {}
+  undici-types@7.16.0:
+    optional: true
 
   undici@6.19.8: {}
 

From 6021a04bdefd45c3a36b4ff7d9cdb6233c1fbad0 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 13:28:51 -0700
Subject: [PATCH 04/25] build QEMU 10.2.2 from source in CI for mapped-ram
 support

Ubuntu 24.04 (ubicloud-standard-8) ships QEMU 8.2, which predates the
mapped-ram migration capability used by the fast-resume snapshot path.
Compile 10.2.2 once per runner image and cache the resulting /opt/qemu
so subsequent runs are fast.
---
 .github/workflows/qemu-emulator-build.yaml | 76 ++++++++++++++++++++--
 1 file changed, 72 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml
index 4bb738124d..9d1078f0c6 100644
--- a/.github/workflows/qemu-emulator-build.yaml
+++ b/.github/workflows/qemu-emulator-build.yaml
@@ -66,10 +66,48 @@ jobs:
           node-version: 22
           cache: pnpm
 
-      - name: Install QEMU dependencies
+      - name: Install system dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64 zstd
+          # qemu-utils gives us qemu-img; qemu-efi-aarch64 provides the arm64
+          # UEFI firmware. The actual qemu-system-* binaries come from the
+          # source build below — Ubuntu 24.04 ships QEMU 8.2 which predates
+          # the mapped-ram migration capability we rely on.
+          sudo apt-get install -y qemu-utils qemu-efi-aarch64 socat genisoimage zstd \
+            ninja-build pkg-config python3-venv \
+            libglib2.0-dev libpixman-1-dev libslirp-dev libepoxy-dev libgbm-dev
+
+      # QEMU 10.2.2 is required for the mapped-ram + multifd migration path
+      # used by the fast-resume snapshot. Cache the compiled prefix so CI
+      # only pays the ~5-8 min build cost once per runner image.
+      - name: Restore QEMU 10.2.2 cache
+        id: qemu-cache
+        uses: actions/cache@v4
+        with:
+          path: /opt/qemu
+          key: qemu-10.2.2-${{ runner.os }}-${{ runner.arch }}-v1
+
+      - name: Build QEMU 10.2.2 from source
+        if: steps.qemu-cache.outputs.cache-hit != 'true'
+        run: |
+          set -euxo pipefail
+          curl -fsSL https://download.qemu.org/qemu-10.2.2.tar.xz -o /tmp/qemu.tar.xz
+          mkdir -p /tmp/qemu-src
+          tar -xf /tmp/qemu.tar.xz -C /tmp/qemu-src --strip-components=1
+          cd /tmp/qemu-src
+          ./configure --prefix=/opt/qemu \
+            --target-list=x86_64-softmmu,aarch64-softmmu \
+            --enable-kvm --enable-slirp --enable-tcg \
+            --disable-docs --disable-gtk --disable-sdl --disable-vnc \
+            --disable-guest-agent --disable-tools
+          make -j"$(nproc)"
+          sudo make install
+
+      - name: Put QEMU 10.2.2 on PATH
+        run: |
+          echo "/opt/qemu/bin" >> "$GITHUB_PATH"
+          /opt/qemu/bin/qemu-system-x86_64 --version
+          /opt/qemu/bin/qemu-system-aarch64 --version
 
       - name: Enable KVM access
         run: |
@@ -165,10 +203,40 @@ jobs:
     steps:
       - uses: actions/checkout@v6
 
-      - name: Install QEMU dependencies
+      - name: Install system dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y qemu-system-x86 qemu-utils socat zstd
+          sudo apt-get install -y qemu-utils socat zstd \
+            ninja-build pkg-config python3-venv \
+            libglib2.0-dev libpixman-1-dev libslirp-dev libepoxy-dev libgbm-dev
+
+      - name: Restore QEMU 10.2.2 cache
+        id: qemu-cache
+        uses: actions/cache@v4
+        with:
+          path: /opt/qemu
+          key: qemu-10.2.2-${{ runner.os }}-${{ runner.arch }}-v1
+
+      - name: Build QEMU 10.2.2 from source
+        if: steps.qemu-cache.outputs.cache-hit != 'true'
+        run: |
+          set -euxo pipefail
+          curl -fsSL https://download.qemu.org/qemu-10.2.2.tar.xz -o /tmp/qemu.tar.xz
+          mkdir -p /tmp/qemu-src
+          tar -xf /tmp/qemu.tar.xz -C /tmp/qemu-src --strip-components=1
+          cd /tmp/qemu-src
+          ./configure --prefix=/opt/qemu \
+            --target-list=x86_64-softmmu,aarch64-softmmu \
+            --enable-kvm --enable-slirp --enable-tcg \
+            --disable-docs --disable-gtk --disable-sdl --disable-vnc \
+            --disable-guest-agent --disable-tools
+          make -j"$(nproc)"
+          sudo make install
+
+      - name: Put QEMU 10.2.2 on PATH
+        run: |
+          echo "/opt/qemu/bin" >> "$GITHUB_PATH"
+          /opt/qemu/bin/qemu-system-x86_64 --version
 
       - uses: pnpm/action-setup@v4
         with:

From 0c0d726b2859fa6447fd395ba51ee6542dde03aa Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 13:50:55 -0700
Subject: [PATCH 05/25] build stack-cli's workspace deps in emulator CI

Switch the CLI build step from `pnpm --filter @stackframe/stack-cli run
build` to `turbo run build --filter=@stackframe/stack-cli...` so that
stack-cli's workspace dependencies (@stackframe/js and
@stackframe/stack-shared) also get compiled to their dist/ outputs.
Without them, `node dist/index.js` fails with ERR_MODULE_NOT_FOUND at
import time.
---
 .github/workflows/qemu-emulator-build.yaml | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml
index 9d1078f0c6..3ed56b1472 100644
--- a/.github/workflows/qemu-emulator-build.yaml
+++ b/.github/workflows/qemu-emulator-build.yaml
@@ -138,8 +138,11 @@ jobs:
       - name: Build stack-cli (for emulator CLI)
         if: matrix.arch == 'amd64'
         run: |
-          pnpm install --frozen-lockfile --filter @stackframe/stack-cli...
-          pnpm --filter @stackframe/stack-cli run build
+          pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...'
+          # Turbo's trailing `...` filter builds stack-cli AND its workspace
+          # deps (@stackframe/js, @stackframe/stack-shared, etc.) — stack-cli
+          # imports them at runtime from their dist/ outputs.
+          pnpm exec turbo run build --filter='@stackframe/stack-cli...'
 
       - name: Start emulator and verify
         if: matrix.arch == 'amd64'
@@ -249,8 +252,11 @@ jobs:
 
       - name: Install stack-cli deps + build
         run: |
-          pnpm install --frozen-lockfile --filter @stackframe/stack-cli...
-          pnpm --filter @stackframe/stack-cli run build
+          pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...'
+          # Turbo's trailing `...` filter builds stack-cli AND its workspace
+          # deps (@stackframe/js, @stackframe/stack-shared, etc.) — stack-cli
+          # imports them at runtime from their dist/ outputs.
+          pnpm exec turbo run build --filter='@stackframe/stack-cli...'
 
       - name: Download built image
         uses: actions/download-artifact@v4

From b03486e1c800dda9163438fe2a2b72bab6dee92b Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 13:59:38 -0700
Subject: [PATCH 06/25] fix emulator pull --pr/--run snapshot detection

First downloadArtifactByName already extracts both qcow2 and savevm.zst
from the single qemu-emulator-${arch} artifact; the second lookup for a
nonexistent -savevm artifact always failed and produced a misleading
'fast-start disabled' message.
---
 packages/stack-cli/src/commands/emulator.ts | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index 5967a11038..00e8fdae60 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -518,17 +518,13 @@ export function registerEmulatorCommand(program: Command) {
         }
         if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`);
         console.log(`Downloaded: ${dest}`);
-        // Snapshot artifact is optional — older CI builds may not produce it.
-        let snapshotDownloaded = false;
-        try {
-          snapshotDownloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}-savevm`, imageDir);
-        } catch (err) {
-          console.log(`Snapshot artifact unavailable for run ${runId}: ${err instanceof Error ? err.message : err}`);
-        }
-        if (snapshotDownloaded && existsSync(snapshotDest)) {
+        // CI publishes both files inside the single qemu-emulator-${arch}
+        // artifact, so the first download already extracts the snapshot when
+        // present. Older builds may not include it.
+        if (existsSync(snapshotDest)) {
           console.log(`Downloaded: ${snapshotDest}`);
-        } else if (!snapshotDownloaded) {
-          console.log(`Snapshot artifact not available for run ${runId}; fast-start disabled.`);
+        } else {
+          console.log(`Snapshot not present in artifact for run ${runId}; fast-start disabled.`);
         }
       } else {
         await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag });

From 0b3a9cfaccaf8c4efab6c222113f3253b2f21950 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 14:07:34 -0700
Subject: [PATCH 07/25] fix sentinel marker path in docker/server entrypoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The docker/server image runs as the unprivileged `node` user, which
cannot write to /var/run. With `set -e` at the top of the script, the
failed `touch` aborted execution after sentinel replacement but before
the backend/dashboard were started — the Check server health CI step
then saw connection refused on ports 8101/8102.

Move the marker into $WORK_DIR (which is already created and owned by
the running user). The emulator snapshot-resume path still benefits: the
marker persists across supervisorctl restarts because $WORK_DIR lives
on the container filesystem.
---
 docker/server/entrypoint.sh | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh
index 975c18975e..05072c330d 100644
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@@ -144,8 +144,10 @@ fi
 # The full-tree sentinel scan is expensive (several seconds over the whole built
 # app tree). On a fast-restart — triggered by the emulator snapshot rotation
 # path — the placeholders have already been sed-replaced by rotate-secrets,
-# and no new sentinels need substitution. Skip the scan in that case.
-SENTINEL_MARKER=/var/run/stack-local-sentinels-replaced
+# and no new sentinels need substitution. Skip the scan in that case. Marker
+# lives in WORK_DIR because the docker/server image runs as the unprivileged
+# `node` user and cannot write to /var/run.
+SENTINEL_MARKER="$WORK_DIR/.stack-sentinels-replaced"
 if [ -f "$SENTINEL_MARKER" ]; then
   echo "Sentinels already replaced on a previous start; skipping scan."
 else
@@ -182,7 +184,7 @@ else
     # Now replace the sentinel with the (properly escaped) value in all files in the working directory.
     find $WORK_DIR/apps -type f -exec sed -i "s${delimiter}${escaped_sentinel}${delimiter}${escaped_value}${delimiter}g" {} +
   done
-  mkdir -p "$(dirname "$SENTINEL_MARKER")" && touch "$SENTINEL_MARKER"
+  touch "$SENTINEL_MARKER"
 fi
 
 # ============= START BACKEND AND DASHBOARD =============

From 2c8ad4c77a9588dad508351b4b1e7998a0f2aa9c Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 14:30:21 -0700
Subject: [PATCH 08/25] address unresolved PR review comments on snapshot
 resume path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- stop_vm no longer deletes runtime-config.iso; the CLI owns its
  lifecycle and the snapshot → cold-boot fallback needs it preserved
  (cmd_reset still wipes RUN_DIR for a full reset). Also sweeps qga.sock.
- Write internal-pck to \$VM_DIR on the host in snapshot mode. Cold boot
  publishes this via virtfs/9p; snapshot mode drops virtfs, so
  --config-file flows would otherwise hang. Handles both the rotation
  path (fresh PCK) and EMULATOR_NO_ROTATION (placeholder PCK).
- Pin RAM in snapshot mode to the build-time 4096 (overridable via
  EMULATOR_SNAPSHOT_RAM). Migration replay requires an identical -m
  value, same constraint as CPU count.
- Fail amd64 build when .savevm.zst is missing rather than shipping a
  cold-boot-only release silently. arm64 stays best-effort for now
  because it runs under TCG and can't be verified end-to-end.
- Install Node/pnpm on both arches. arm64 also runs
  generate-env-development.mjs, which otherwise relied on the runner
  image's preinstalled Node.
---
 .github/workflows/qemu-emulator-build.yaml | 13 ++++-
 docker/local-emulator/qemu/run-emulator.sh | 64 ++++++++++++++++------
 2 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml
index 3ed56b1472..e2298401d8 100644
--- a/.github/workflows/qemu-emulator-build.yaml
+++ b/.github/workflows/qemu-emulator-build.yaml
@@ -55,13 +55,14 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
+      # Node/pnpm are needed on both arches: arm64 also runs
+      # generate-env-development.mjs inside build-image.sh. amd64 additionally
+      # builds and runs the CLI for the verification steps below.
       - uses: pnpm/action-setup@v4
-        if: matrix.arch == 'amd64'
         with:
           version: 10.23.0
 
       - uses: actions/setup-node@v4
-        if: matrix.arch == 'amd64'
         with:
           node-version: 22
           cache: pnpm
@@ -177,8 +178,14 @@ jobs:
           if [ -f "$SAVEVM" ]; then
             cp "$SAVEVM" "stack-emulator-${{ matrix.arch }}.savevm.zst"
             ls -lh "stack-emulator-${{ matrix.arch }}.savevm.zst"
+          elif [ "${{ matrix.arch }}" = "amd64" ]; then
+            # amd64 is the fast-resume contract: if the build didn't produce a
+            # snapshot, fail loudly rather than silently shipping a
+            # cold-boot-only release.
+            echo "ERROR: snapshot build expected to produce $SAVEVM for amd64." >&2
+            exit 1
           else
-            echo "NOTE: no savevm snapshot was produced; fast-start will be unavailable for this arch."
+            echo "NOTE: no savevm snapshot was produced for ${{ matrix.arch }}; fast-start will be unavailable for this arch."
           fi
 
       - name: Upload image artifact
diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh
index 12564f369d..a49b10b428 100755
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@@ -308,7 +308,7 @@ build_qemu_cmd() {
   # build and are not needed at runtime, but their virtio-blk slots must
   # exist so the migration replay matches device IDs. Runtime-only devices
   # (virtfs, balloon) live at higher slots — extra at destination is fine.
-  local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS"
+  local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" snapshot_ram="$VM_RAM"
   if snapshot_available; then
     log "Snapshot found at $savevm_file — fast-resume enabled."
     # -incoming defer: QEMU starts, waits for a QMP migrate-incoming command.
@@ -316,9 +316,17 @@ build_qemu_cmd() {
     # which enables parallel RAM restore (~2-3x faster than streamed decode).
     snapshot_args+=(-incoming defer)
     snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}"
+    # RAM size is baked into the snapshot; migration replay requires an
+    # identical -m value. Pin to the build-time RAM (4096) and ignore
+    # EMULATOR_RAM — override via EMULATOR_SNAPSHOT_RAM if a different
+    # snapshot was produced.
+    snapshot_ram="${EMULATOR_SNAPSHOT_RAM:-4096}"
     if [ "$snapshot_smp" != "$VM_CPUS" ]; then
       log "Pinning SMP to ${snapshot_smp} for snapshot resume (build-time value)."
     fi
+    if [ "$snapshot_ram" != "$VM_RAM" ]; then
+      log "Pinning RAM to ${snapshot_ram}MB for snapshot resume (ignoring EMULATOR_RAM=${VM_RAM})."
+    fi
 
     # Tiny placeholder ISOs to match the seed.iso / bundle.iso slots present
     # at snapshot time. Their content doesn't matter (cloud-init has already
@@ -351,7 +359,7 @@ build_qemu_cmd() {
       -cpu "$cpu"
       "${firmware_args[@]}"
       -boot order=c
-      -m "$VM_RAM"
+      -m "$snapshot_ram"
       -smp "$snapshot_smp"
       -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
       "${runtime_only_args[@]}"
@@ -502,14 +510,17 @@ qmp_incoming_and_cont() {
   return 1
 }
 
-# Generate fresh per-install secrets on the host. We pass them to the guest
-# through QGA's guest-exec input-data field (base64-encoded), so no host file
-# or virtfs mount is needed in the snapshot path.
-generate_fresh_secrets_payload() {
-  printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$(openssl rand -hex 32)"
-  printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$(openssl rand -hex 32)"
-  printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$(openssl rand -hex 32)"
-  printf 'CRON_SECRET=%s\n' "$(openssl rand -hex 32)"
+# Placeholder PCK baked into the snapshot. Kept in sync with the value in
+# docker/local-emulator/qemu/cloud-init/emulator/user-data.
+SNAPSHOT_PLACEHOLDER_PCK="00000000000000000000000000000000ffffffffffffffffffffffffffffffff"
+
+# Write the internal PCK to the host path the CLI reads (see
+# readInternalPck() in packages/stack-cli/src/commands/emulator.ts). In
+# cold-boot mode the guest publishes this via virtfs/9p, but snapshot mode
+# drops virtfs, so the host has to write it itself.
+write_internal_pck_for_cli() {
+  local pck="$1"
+  (umask 077 && printf '%s' "$pck" > "$VM_DIR/internal-pck")
 }
 
 # Drive qemu-guest-agent via its virtserialport socket. QGA speaks the same
@@ -547,8 +558,22 @@ qga_trigger_fast_rotate() {
   # message is available in serial.log. We pipe the fresh-secrets env file
   # (as base64) to the script via input-data — keeps secrets off the
   # filesystem and avoids needing virtfs.
-  local secrets_b64 resp pid
-  secrets_b64=$(generate_fresh_secrets_payload | base64 | tr -d '\n')
+  local fresh_pck fresh_ssk fresh_sak fresh_cron payload secrets_b64 resp pid
+  fresh_pck="$(openssl rand -hex 32)"
+  fresh_ssk="$(openssl rand -hex 32)"
+  fresh_sak="$(openssl rand -hex 32)"
+  fresh_cron="$(openssl rand -hex 32)"
+  payload=$(
+    printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$fresh_pck"
+    printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$fresh_ssk"
+    printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$fresh_sak"
+    printf 'CRON_SECRET=%s\n' "$fresh_cron"
+  )
+  # Publish the fresh PCK to the host path the CLI reads. Writing before the
+  # guest-exec so a --config-file flow that polls from another process can
+  # pick it up the moment rotation completes.
+  write_internal_pck_for_cli "$fresh_pck"
+  secrets_b64=$(printf '%s' "$payload" | base64 | tr -d '\n')
   local cmd
   cmd=$(printf '{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/trigger-fast-rotate","capture-output":true,"input-data":"%s"}}' "$secrets_b64")
   resp=$(printf '%s\n' "$cmd" | qga_send || true)
@@ -599,8 +624,11 @@ stop_vm() {
       kill -9 "$pid" 2>/dev/null || true
     fi
   fi
-  rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/serial.log"
-  rm -f "$VM_DIR/runtime-config.iso"
+  rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock" "$VM_DIR/serial.log"
+  # Do NOT remove runtime-config.iso: the CLI owns its lifecycle and run-emulator.sh
+  # cannot regenerate it. Removing here breaks the snapshot → cold-boot fallback
+  # (which calls stop_vm before recursing into cmd_start → ensure_runtime_config_iso).
+  # `cmd_reset` wipes $RUN_DIR entirely when a full reset is wanted.
 }
 
 cmd_start() {
@@ -642,6 +670,9 @@ cmd_start() {
 
     if [ "$EMULATOR_NO_ROTATION" = "1" ]; then
       warn "EMULATOR_NO_ROTATION=1: snapshot's placeholder secrets are in effect — do not expose this instance."
+      # The placeholder PCK is live in the running image; publish it to the
+      # host path so --config-file flows still work.
+      write_internal_pck_for_cli "$SNAPSHOT_PLACEHOLDER_PCK"
       if ! wait_for_condition "services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
         warn "Services did not respond after resume — falling back to cold boot."
         tail_vm_logs
@@ -691,9 +722,8 @@ cmd_start() {
 snapshot_fallback_to_cold_boot() {
   warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..."
   stop_vm
-  # Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one,
-  # but keep the CLI-generated runtime-config.iso (we can't regenerate it
-  # from shell — the CLI owns that).
+  # Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one.
+  # runtime-config.iso is preserved by stop_vm (the CLI owns it).
   rm -f "$VM_DIR/disk.qcow2" "$VM_DIR/base-image.fingerprint" \
         "$VM_DIR/seed.phantom" "$VM_DIR/bundle.phantom"
   EMULATOR_NO_SNAPSHOT=1

From 76f954353673e7e1e46bd73087039dba124027e8 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 14:42:20 -0700
Subject: [PATCH 09/25] simplify emulator fast-start: tighter polls, drop dead
 wrappers

- run-emulator.sh: drop wait_for_condition poll interval from 1s to 0.2s
- emulator.ts: replace existsSync+readFileSync TOCTOU in readInternalPck
  with try/ENOENT; tighten initial backoff to 50ms; drop redundant
  mkdirSync in startEmulator; surface stop-failure on stderr instead of
  swallowing silently
- iso.ts: inline trivial buildRootDirRecordInVD wrapper
---
 docker/local-emulator/qemu/run-emulator.sh  |  2 +-
 packages/stack-cli/src/commands/emulator.ts | 16 +++++++++-------
 packages/stack-cli/src/lib/iso.ts           | 14 +++++---------
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh
index a49b10b428..75cbd3a4b4 100755
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@@ -212,7 +212,7 @@ wait_for_condition() {
       log "${label} ready in ${elapsed}s"
       return 0
     fi
-    sleep 1
+    sleep 0.2
     elapsed=$((SECONDS - started))
     printf "\r  [%3ds] %s..." "$elapsed" "$label"
   done
diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index 00e8fdae60..3833cffeeb 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -57,11 +57,13 @@ function internalPckPath(): string {
 async function readInternalPck(timeoutMs = 60_000): Promise<string> {
   const path = internalPckPath();
   const deadline = Date.now() + timeoutMs;
-  let delay = 250;
+  let delay = 50;
   while (Date.now() < deadline) {
-    if (existsSync(path)) {
+    try {
       const contents = readFileSync(path, "utf-8").trim();
       if (contents) return contents;
+    } catch (e) {
+      if ((e as NodeJS.ErrnoException).code !== "ENOENT") throw e;
     }
     await new Promise((r) => setTimeout(r, delay));
     delay = Math.min(delay * 2, 2000);
@@ -223,7 +225,6 @@ function isEmulatorRunning(): boolean {
 }
 
 async function startEmulator(arch: "arm64" | "amd64"): Promise<void> {
-  mkdirSync(emulatorImageDir(), { recursive: true });
   const img = join(emulatorImageDir(), `stack-emulator-${arch}.qcow2`);
   if (!existsSync(img)) {
     console.log("No emulator image found. Pulling latest...");
@@ -518,9 +519,6 @@ export function registerEmulatorCommand(program: Command) {
         }
         if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`);
         console.log(`Downloaded: ${dest}`);
-        // CI publishes both files inside the single qemu-emulator-${arch}
-        // artifact, so the first download already extracts the snapshot when
-        // present. Older builds may not include it.
         if (existsSync(snapshotDest)) {
           console.log(`Downloaded: ${snapshotDest}`);
         } else {
@@ -617,8 +615,12 @@ export function registerEmulatorCommand(program: Command) {
           process.exit(exitCode);
         } else {
           console.log("\nStopping emulator...");
+          const warnStopFailed = (e: unknown) => {
+            const msg = e instanceof Error ? e.message : String(e);
+            process.stderr.write(`Failed to stop emulator cleanly: ${msg}\n`);
+          };
           runEmulator("stop")
-            .catch(() => { /* best-effort stop */ })
+            .catch(warnStopFailed)
             .finally(() => process.exit(exitCode));
         }
       });
diff --git a/packages/stack-cli/src/lib/iso.ts b/packages/stack-cli/src/lib/iso.ts
index b226af0bc7..6b8ac1bb12 100644
--- a/packages/stack-cli/src/lib/iso.ts
+++ b/packages/stack-cli/src/lib/iso.ts
@@ -259,13 +259,6 @@ function buildVolumeDescriptorTerminator(): Buffer {
   return buf;
 }
 
-// Builds the 34-byte root directory record that lives inside the volume
-// descriptor (BP 157-190 of PVD/SVD). Identical layout to a regular directory
-// record but identifier is the single byte 0x00.
-function buildRootDirRecordInVD(rootSector: number, rootSize: number, recDate: Buffer): Buffer {
-  return buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x00]));
-}
-
 export type IsoFile = { name: string, data: Buffer };
 
 export function buildIso(volumeId: string, files: IsoFile[]): Buffer {
@@ -317,8 +310,11 @@ export function buildIso(volumeId: string, files: IsoFile[]): Buffer {
   const totalSectors = nextSector;
   const pathTableSize = 10;
 
-  const isoRootDirRecordVD = buildRootDirRecordInVD(isoRootSector, SECTOR, recDate);
-  const jolietRootDirRecordVD = buildRootDirRecordInVD(jolietRootSector, SECTOR, recDate);
+  // Root directory record inside the volume descriptor (BP 157-190 of PVD/SVD):
+  // same layout as a regular dir record but the identifier is the single byte 0x00.
+  const rootIdent = Buffer.from([0x00]);
+  const isoRootDirRecordVD = buildDirRecord(isoRootSector, SECTOR, true, recDate, rootIdent);
+  const jolietRootDirRecordVD = buildDirRecord(jolietRootSector, SECTOR, true, recDate, rootIdent);
 
   const pvd = buildVolumeDescriptor({
     joliet: false,

From 3586115b0e8b9f99325d3db218f47164b9370d9d Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 16:07:26 -0700
Subject: [PATCH 10/25] fix snapshot resume host fs + restore standalone
 run-emulator.sh path

Snapshot-resume mode dropped virtfs (QEMU disallows migration with 9p
mounted), leaving the container's /host bind mount empty so the
/local-emulator/project route returned 400. Hot-plug virtio-9p over a
pre-attached PCIe root port after resume, mount in guest via QGA, and
make /host a shared mount point with rshared docker propagation so the
new mount reaches the running container without restart.

run-emulator.sh ensure_runtime_config_iso now falls back to in-script
ISO generation when invoked outside the CLI (fixes pnpm emulator:start).

Also propagate fresh emulator credentials to VITE_/EXPO_PUBLIC_ env var
prefixes in the CLI run command.
---
 docker/local-emulator/qemu/build-image.sh     |   5 +
 .../qemu/cloud-init/emulator/user-data        |  41 +++++--
 docker/local-emulator/qemu/run-emulator.sh    | 107 +++++++++++++++++-
 packages/stack-cli/src/commands/emulator.ts   |   6 +
 4 files changed, 148 insertions(+), 11 deletions(-)

diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index 84ab0fa341..295a7972ee 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -477,6 +477,11 @@ build_one() {
       -device virtio-serial
       -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
       -drive "file=$runtime_iso,format=raw,if=virtio,readonly=on"
+      # Empty PCIe root port reserved for runtime hot-plug of virtio-9p.
+      # The integrated pcie.0 bus on q35 / arm64-virt is static — hotplug
+      # only works through a root port. Must be present at snapshot capture
+      # so the resumed device tree matches.
+      -device "pcie-root-port,id=hostfs-port,bus=pcie.0,chassis=1"
     )
     # QEMU disallows migration when virtfs is mounted in the guest — virtfs
     # has guest-side state (open handles, mount table) that isn't migratable.
diff --git a/docker/local-emulator/qemu/cloud-init/emulator/user-data b/docker/local-emulator/qemu/cloud-init/emulator/user-data
index 8a968bf079..b3c21527b4 100644
--- a/docker/local-emulator/qemu/cloud-init/emulator/user-data
+++ b/docker/local-emulator/qemu/cloud-init/emulator/user-data
@@ -166,15 +166,40 @@ write_files:
     permissions: '0755'
     content: |
       #!/bin/bash
+      # Mount the host filesystem at /host. Two modes:
+      #   (no args)       — cold-boot: bind /host on itself, make it a shared
+      #                     mount point, then mount virtio-9p on top. The
+      #                     bind+shared step is what lets the docker bind
+      #                     mount (-v /host:/host:rshared) receive later
+      #                     propagation events.
+      #   --post-resume   — snapshot-resume: /host is already shared (set up
+      #                     at build time and preserved across the snapshot,
+      #                     plus the docker bind mount has rshared
+      #                     propagation). The host has just hot-plugged
+      #                     virtio-9p; mount it on /host and the new mount
+      #                     propagates into the running container.
       set -uo pipefail
       mkdir -p /host
-      if mountpoint -q /host; then
-        exit 0
+
+      # Idempotent: bind /host on itself once so it becomes a mount point
+      # with its own propagation, then make it shared. mount --make-shared
+      # requires a mount point, hence the bind first.
+      if ! mountpoint -q /host; then
+        mount --bind /host /host
+      fi
+      mount --make-shared /host
+
+      if [ "${1:-}" = "--post-resume" ]; then
+        if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host; then
+          exit 0
+        fi
+        echo "post-resume 9p mount failed" >&2
+        exit 1
       fi
-      # In snapshot-build mode the host detaches virtfs (QEMU disallows
-      # migration while it's mounted), and at runtime we re-attach it. Tolerate
-      # both states: try to mount, fall through to an empty /host if no
-      # virtio-9p channel is available.
+
+      # Cold boot. In snapshot-build mode the host detaches virtfs (QEMU
+      # disallows migration while it's mounted), so the 9p mount may not be
+      # available — tolerate that and fall through to an empty /host.
       if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host 2>/dev/null; then
         exit 0
       fi
@@ -220,7 +245,7 @@ write_files:
           -v stack-clickhouse-data:/data/clickhouse \
           -v stack-minio-data:/data/minio \
           -v stack-inbucket-data:/data/inbucket \
-          -v /host:/host \
+          -v /host:/host:rshared \
           stack-local-emulator 2>&1 | tee -a "$host_log"
       else
         exec docker run \
@@ -234,7 +259,7 @@ write_files:
           -v stack-clickhouse-data:/data/clickhouse \
           -v stack-minio-data:/data/minio \
           -v stack-inbucket-data:/data/inbucket \
-          -v /host:/host \
+          -v /host:/host:rshared \
           stack-local-emulator
       fi
 
diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh
index 75cbd3a4b4..72f095cf57 100755
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@@ -155,11 +155,34 @@ runtime_fingerprint() {
 ensure_runtime_config_iso() {
   local cfg_iso
   cfg_iso="$(runtime_iso_path)"
-  if [ ! -s "$cfg_iso" ]; then
-    err "Runtime config ISO missing at $cfg_iso."
-    err "The CLI normally generates this; if you're invoking run-emulator.sh directly, run via 'stack emulator start' instead."
+  if [ -s "$cfg_iso" ]; then
+    return 0
+  fi
+
+  # Fallback used when this script is invoked directly (e.g. `pnpm
+  # emulator:start`) rather than through the stack-cli, which generates the
+  # ISO via packages/stack-cli/src/lib/iso.ts. Mirrors the field set + volume
+  # label so the guest's render-stack-env mounts it the same way.
+  local base_env="$SCRIPT_DIR/../.env.development"
+  if [ ! -f "$base_env" ]; then
+    err "Cannot generate runtime config ISO: $base_env is missing."
+    err "Run 'pnpm run emulator:generate-env' first, or invoke via 'stack emulator start'."
     exit 1
   fi
+
+  local cfg_dir="$VM_DIR/runtime-config"
+  rm -rf "$cfg_dir"
+  mkdir -p "$cfg_dir"
+  {
+    printf "STACK_EMULATOR_PORT_PREFIX=%s\n" "$PORT_PREFIX"
+    printf "STACK_EMULATOR_DASHBOARD_HOST_PORT=%s\n" "$EMULATOR_DASHBOARD_PORT"
+    printf "STACK_EMULATOR_BACKEND_HOST_PORT=%s\n" "$EMULATOR_BACKEND_PORT"
+    printf "STACK_EMULATOR_MINIO_HOST_PORT=%s\n" "$EMULATOR_MINIO_PORT"
+    printf "STACK_EMULATOR_INBUCKET_HOST_PORT=%s\n" "$EMULATOR_INBUCKET_PORT"
+    printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$VM_DIR"
+  } > "$cfg_dir/runtime.env"
+  cp "$base_env" "$cfg_dir/base.env"
+  make_iso_from_dir "$cfg_iso" "STACKCFG" "$cfg_dir"
 }
 
 service_is_up() {
@@ -371,6 +394,18 @@ build_qemu_cmd() {
       -chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0"
       -device virtio-serial
       -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
+      # Empty PCIe root port reserved for runtime hot-plug of virtio-9p.
+      # MUST be the last explicit -device entry — slot order has to mirror
+      # build-image.sh exactly or migration replay stalls in inmigrate.
+      -device "pcie-root-port,id=hostfs-port,bus=pcie.0,chassis=1"
+      # Pre-create the host-side fsdev backend so the post-resume QMP
+      # device_add can attach to it by id. -fsdev is host-only state — not
+      # part of the migrated device tree — so it's safe to add here even
+      # though the snapshot was captured without it. Going through -fsdev
+      # avoids the HMP fsdev_add command, whose error path is invisible
+      # via human-monitor-command (errors come back as a return string,
+      # not a QMP error).
+      -fsdev "local,id=hostfs,path=/,security_model=none"
       "${snapshot_args[@]}"
       -serial "file:$VM_DIR/serial.log"
       -display none
@@ -552,6 +587,56 @@ qga_wait_ready() {
   return 1
 }
 
+# Hot-plug a virtio-9p device backed by host `/` after a snapshot resume.
+# The snapshot was captured WITHOUT virtfs (QEMU disallows migration while
+# 9p is mounted in the guest), so the resumed VM has no host filesystem
+# available until we add one here. The fsdev backend was pre-created by
+# the -fsdev option in build_qemu_cmd; we only need the device_add half.
+qmp_hotplug_9p() {
+  local resp
+  resp=$(printf '%s\n' \
+    '{"execute":"device_add","arguments":{"driver":"virtio-9p-pci","id":"hostfs-dev","fsdev":"hostfs","mount_tag":"hostfs","bus":"hostfs-port"}}' \
+    | qmp_send)
+  if printf '%s' "$resp" | grep -q '"error"'; then
+    err "QMP device_add virtio-9p-pci failed: $resp"
+    return 1
+  fi
+  return 0
+}
+
+# Run /usr/local/bin/mount-host-fs --post-resume in the guest. The script
+# mounts the freshly-hot-plugged 9p device on /host, which is a shared
+# mount point — so the new mount propagates into the running stack
+# container's `-v /host:/host:rshared` bind mount without a container
+# restart.
+qga_mount_host_fs() {
+  local cmd resp pid status_resp exited exitcode
+  cmd='{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/mount-host-fs","arg":["--post-resume"],"capture-output":true}}'
+  resp=$(printf '%s\n' "$cmd" | qga_send || true)
+  pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
+  if [ -z "$pid" ]; then
+    err "guest-exec mount-host-fs did not return a pid; response: $resp"
+    return 1
+  fi
+  local deadline=$((SECONDS + 20))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true)
+    exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/')
+    if [ "$exited" = "true" ]; then
+      exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/')
+      if [ "${exitcode:-0}" = "0" ]; then
+        log "host fs mounted in guest"
+        return 0
+      fi
+      err "mount-host-fs exited with code ${exitcode:-unknown}; response: $status_resp"
+      return 1
+    fi
+    sleep 0.2
+  done
+  err "mount-host-fs did not complete within 20s"
+  return 1
+}
+
 qga_trigger_fast_rotate() {
   # guest-exec returns a pid; we then poll guest-exec-status until the
   # process exits, and surface its exit code. Capture output so a failure
@@ -668,6 +753,22 @@ cmd_start() {
       return
     fi
 
+    # Hot-plug the host filesystem. The snapshot was captured without
+    # virtfs, so the running container has an empty /host bind mount until
+    # we add the 9p device and mount it in the guest. Required for routes
+    # like /local-emulator/project that read user-supplied paths via /host.
+    log "Hot-plugging host filesystem..."
+    if ! qmp_hotplug_9p; then
+      warn "Failed to hot-plug 9p device — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
+    if ! qga_mount_host_fs; then
+      warn "Failed to mount host fs in guest — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
+
     if [ "$EMULATOR_NO_ROTATION" = "1" ]; then
       warn "EMULATOR_NO_ROTATION=1: snapshot's placeholder secrets are in effect — do not expose this instance."
       # The placeholder PCK is live in the running image; publish it to the
diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index 3833cffeeb..12b3080892 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -592,11 +592,17 @@ export function registerEmulatorCommand(program: Command) {
         const apiUrl = `http://127.0.0.1:${backendPort}`;
         childEnv.STACK_PROJECT_ID = creds.project_id;
         childEnv.NEXT_PUBLIC_STACK_PROJECT_ID = creds.project_id;
+        childEnv.VITE_STACK_PROJECT_ID = creds.project_id;
+        childEnv.EXPO_PUBLIC_STACK_PROJECT_ID = creds.project_id;
         childEnv.STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
         childEnv.NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
+        childEnv.VITE_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
+        childEnv.EXPO_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
         childEnv.STACK_SECRET_SERVER_KEY = creds.secret_server_key;
         childEnv.STACK_API_URL = apiUrl;
         childEnv.NEXT_PUBLIC_STACK_API_URL = apiUrl;
+        childEnv.VITE_STACK_API_URL = apiUrl;
+        childEnv.EXPO_PUBLIC_STACK_API_URL = apiUrl;
       }
 
       const child = spawn(cmd, { shell: true, stdio: "inherit", env: childEnv });

From 037755ba161dd0d0c4b2bf1f295474da7bdc0d2b Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 16:53:07 -0700
Subject: [PATCH 11/25] retry tsdown migration build to survive qemu-user futex
 hangs

Cross-arch arm64-on-amd64 docker buildx runs the rolldown-backed tsdown
build under qemu-user, whose futex emulation occasionally deadlocks the
worker threads. Wrap the call in a bounded timeout + 3-attempt retry so
a hang fails the layer in <11min and recovers on the next try.
---
 docker/local-emulator/Dockerfile | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/docker/local-emulator/Dockerfile b/docker/local-emulator/Dockerfile
index 138270b405..603999b649 100644
--- a/docker/local-emulator/Dockerfile
+++ b/docker/local-emulator/Dockerfile
@@ -57,8 +57,22 @@ ENV NEXT_PUBLIC_STACK_STRIPE_PUBLISHABLE_KEY=pk_test_mock_publishable_key_for_lo
 # Build the backend NextJS app
 RUN pnpm turbo run docker-build --filter=@stackframe/backend... --filter=@stackframe/dashboard...
 
-# Build the self-host seed script
-RUN cd apps/backend && pnpm build-self-host-migration-script
+# Build the self-host seed script.
+# tsdown -> rolldown is multi-threaded Rust; under qemu-user (cross-arch
+# arm64-on-amd64) its futex emulation occasionally deadlocks and the build
+# hangs forever. Bound each attempt and retry to ride out the race.
+RUN cd apps/backend && \
+    attempt=1; \
+    while :; do \
+      timeout --kill-after=30s 600s pnpm build-self-host-migration-script && break; \
+      rc=$?; \
+      if [ "$attempt" -ge 3 ]; then \
+        echo "build-self-host-migration-script failed after $attempt attempts (last rc=$rc)" >&2; \
+        exit "$rc"; \
+      fi; \
+      echo "build-self-host-migration-script attempt $attempt failed (rc=$rc); retrying..." >&2; \
+      attempt=$((attempt + 1)); \
+    done
 
 
 # Prune node_modules for runtime: remove dev tools, heavy UI packages,

From 894c1ce77cdce41450dffe788a0aaaf4db1fc3b8 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 18:27:33 -0700
Subject: [PATCH 12/25] fix CLI artifact download + build arm64 emulator on
 macOS runner

- Fix 415 on artifact download: use application/vnd.github+json Accept header
- Fix EACCES on run-emulator.sh: chmod +x at runtime (npm strips execute bit)
- Move arm64 emulator build to a macOS-15 runner with HVF so the snapshot is
  portable to developer Macs (KVM snapshots from Linux are not resumable under
  HVF due to differing -cpu max feature sets)
---
 .../workflows/qemu-emulator-build-arm64.yaml  | 134 ++++++++++++++++++
 .github/workflows/qemu-emulator-build.yaml    |  11 +-
 packages/stack-cli/src/commands/emulator.ts   |  14 +-
 3 files changed, 147 insertions(+), 12 deletions(-)
 create mode 100644 .github/workflows/qemu-emulator-build-arm64.yaml

diff --git a/.github/workflows/qemu-emulator-build-arm64.yaml b/.github/workflows/qemu-emulator-build-arm64.yaml
new file mode 100644
index 0000000000..c58e06d40f
--- /dev/null
+++ b/.github/workflows/qemu-emulator-build-arm64.yaml
@@ -0,0 +1,134 @@
+name: Build QEMU Emulator Image (arm64 / macOS)
+
+# arm64 emulator images are built on a macOS Apple Silicon runner so the
+# snapshot is captured under HVF — the same accelerator developer Macs use.
+# KVM snapshots (from Linux runners) are NOT resumable under HVF because
+# `-cpu max` expands to different feature sets under each accelerator.
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+  pull_request:
+    paths:
+      - 'docker/local-emulator/**'
+      - '.github/workflows/qemu-emulator-build-arm64.yaml'
+  workflow_dispatch:
+
+concurrency:
+  group: qemu-arm64-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }}
+
+env:
+  EMULATOR_IMAGE_NAME: stack-local-emulator
+  EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images
+  EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run
+
+jobs:
+  build:
+    name: Build QEMU Image (arm64)
+    runs-on: macos-15
+    timeout-minutes: 120
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: pnpm/action-setup@v4
+        with:
+          version: 10.23.0
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+
+      - name: Install system dependencies
+        run: brew install qemu socat zstd
+
+      - name: Set up Docker via colima
+        run: |
+          brew install docker docker-buildx colima
+          mkdir -p ~/.docker/cli-plugins
+          ln -sfn "$(brew --prefix docker-buildx)/bin/docker-buildx" ~/.docker/cli-plugins/docker-buildx
+          colima start --cpu 4 --memory 6 --disk 60 --arch aarch64
+          docker info
+          docker buildx version
+
+      - name: Verify QEMU + HVF
+        run: |
+          qemu-system-aarch64 --version
+          if qemu-system-aarch64 -accel help 2>&1 | grep -q hvf; then
+            echo "HVF available — snapshot will be portable to developer Macs"
+          else
+            echo "::error::HVF not available on this runner"
+            exit 1
+          fi
+
+      - name: Build QEMU image
+        run: |
+          chmod +x docker/local-emulator/qemu/build-image.sh
+          EMULATOR_PROVISION_TIMEOUT=6000 \
+            docker/local-emulator/qemu/build-image.sh arm64
+
+      - name: Generate emulator env
+        run: node docker/local-emulator/generate-env-development.mjs
+
+      # HVF gives us native-speed arm64 — we can verify the image boots
+      # and services come up, unlike the old cross-arch TCG path.
+      - name: Build stack-cli
+        run: |
+          pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...'
+          pnpm exec turbo run build --filter='@stackframe/stack-cli...'
+
+      - name: Start emulator and verify
+        env:
+          EMULATOR_ARCH: arm64
+          EMULATOR_READY_TIMEOUT: 3200
+          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
+          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
+        run: node packages/stack-cli/dist/index.js emulator start
+
+      - name: Verify services are healthy
+        env:
+          EMULATOR_ARCH: arm64
+          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
+          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
+        run: node packages/stack-cli/dist/index.js emulator status
+
+      - name: Stop emulator
+        if: always()
+        env:
+          EMULATOR_ARCH: arm64
+          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
+          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
+        run: node packages/stack-cli/dist/index.js emulator stop
+
+      - name: Print serial log on failure
+        if: failure()
+        run: |
+          tail -100 "$EMULATOR_RUN_DIR/vm/serial.log" 2>/dev/null || true
+
+      - name: Package image
+        run: |
+          BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-arm64.qcow2"
+          SAVEVM="docker/local-emulator/qemu/images/stack-emulator-arm64.savevm.zst"
+          cp "$BASE_IMG" "stack-emulator-arm64.qcow2"
+          if [ -f "$SAVEVM" ]; then
+            cp "$SAVEVM" "stack-emulator-arm64.savevm.zst"
+            ls -lh "stack-emulator-arm64.savevm.zst"
+          else
+            echo "::error::Snapshot was not produced — fast-start will be unavailable"
+            exit 1
+          fi
+
+      - name: Upload image artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: qemu-emulator-arm64
+          path: |
+            stack-emulator-arm64.qcow2
+            stack-emulator-arm64.savevm.zst
+          if-no-files-found: error
+          retention-days: 30
+          compression-level: 0
diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml
index e2298401d8..81cb509e4b 100644
--- a/.github/workflows/qemu-emulator-build.yaml
+++ b/.github/workflows/qemu-emulator-build.yaml
@@ -35,16 +35,11 @@ jobs:
       matrix:
         include:
           # amd64 runs natively under KVM on ubicloud's amd64 runner.
+          # arm64 is built in a separate workflow on a macOS runner (HVF)
+          # so that the snapshot is portable to developer Macs.
+          # See qemu-emulator-build-arm64.yaml.
           - arch: amd64
             runner: ubicloud-standard-8
-          # arm64 runs under cross-arch TCG on ubicloud's amd64 runner.
-          # No KVM for arm64 guests on an amd64 host; cortex-a72 + V8
-          # --jitless together sidestep the SIGTRAPs that cross-arch TCG
-          # hits on aggressive arm64 JIT code. Smoke test is still skipped
-          # because the backend can't come up reliably under cross-arch
-          # TCG within any sane window.
-          - arch: arm64
-            runner: ubicloud-standard-8
 
     steps:
       - uses: actions/checkout@v6
diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index 12b3080892..9088bbc3f0 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -1,7 +1,7 @@
 import { Command } from "commander";
 import { execFileSync, spawn } from "child_process";
 import extract from "extract-zip";
-import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs";
+import { chmodSync, createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs";
 import { homedir } from "os";
 import { dirname, join, resolve } from "path";
 import { Readable } from "stream";
@@ -143,12 +143,18 @@ async function ghApi<T>(path: string): Promise<T> {
 function emulatorScriptsDir(): string {
   const here = dirname(fileURLToPath(import.meta.url));
   const bundled = join(here, "emulator");
-  if (existsSync(join(bundled, "run-emulator.sh"))) return bundled;
+  if (existsSync(join(bundled, "run-emulator.sh"))) return ensureExecutable(bundled);
   const repo = resolve(here, "../../../docker/local-emulator/qemu");
-  if (existsSync(join(repo, "run-emulator.sh"))) return repo;
+  if (existsSync(join(repo, "run-emulator.sh"))) return ensureExecutable(repo);
   throw new CliError("Emulator scripts not found in CLI bundle.");
 }
 
+// npm pack strips the execute bit from non-`bin` files, so restore it here.
+function ensureExecutable(scriptsDir: string): string {
+  try { chmodSync(join(scriptsDir, "run-emulator.sh"), 0o755); } catch { /* best-effort */ }
+  return scriptsDir;
+}
+
 function baseEnvPath(): string {
   // Lives one directory up from the scripts dir in both bundled and repo
   // layouts (dist/.env.development vs docker/local-emulator/.env.development).
@@ -467,7 +473,7 @@ async function downloadArtifactByName(repo: string, runId: string, name: string,
   console.log(`Downloading artifact '${name}' from run ${runId}...`);
   await downloadWithProgress(
     `${GITHUB_API}/repos/${repo}/actions/artifacts/${match.id}/zip`,
-    { Accept: "application/octet-stream", Authorization: `Bearer ${token}` },
+    { Accept: "application/vnd.github+json", Authorization: `Bearer ${token}` },
     zipPath,
     match.size_in_bytes,
   );

From 54ecda8701a3d27a8770208ec8bd851c324639aa Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 18:31:57 -0700
Subject: [PATCH 13/25] fix colima on GHA macOS: use QEMU backend instead of VZ
 driver

---
 .github/workflows/qemu-emulator-build-arm64.yaml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/qemu-emulator-build-arm64.yaml b/.github/workflows/qemu-emulator-build-arm64.yaml
index c58e06d40f..5018d22c1d 100644
--- a/.github/workflows/qemu-emulator-build-arm64.yaml
+++ b/.github/workflows/qemu-emulator-build-arm64.yaml
@@ -49,9 +49,11 @@ jobs:
       - name: Set up Docker via colima
         run: |
           brew install docker docker-buildx colima
-          mkdir -p ~/.docker/cli-plugins
-          ln -sfn "$(brew --prefix docker-buildx)/bin/docker-buildx" ~/.docker/cli-plugins/docker-buildx
-          colima start --cpu 4 --memory 6 --disk 60 --arch aarch64
+          # Wire up buildx as a CLI plugin
+          mkdir -p ~/.docker
+          echo '{"cliPluginsExtraDirs":["/opt/homebrew/lib/docker/cli-plugins"]}' > ~/.docker/config.json
+          # VZ driver doesn't work on GHA macOS runners — use QEMU backend
+          colima start --vm-type=qemu --cpu 4 --memory 6 --disk 60 --arch aarch64
           docker info
           docker buildx version
 

From 49a20ed019669fd4deef4fa350a95c57f660f271 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 18:36:50 -0700
Subject: [PATCH 14/25] split arm64 build: Docker on Linux, QEMU snapshot on
 macOS

Docker is difficult to run on macOS CI runners (colima VZ and QEMU
backends both crash). Split into two stages:
  1. docker-build (Linux): builds arm64 Docker image, exports tarball
  2. qemu-snapshot (macOS): provisions QEMU VM under HVF, captures snapshot

Add SKIP_DOCKER_BUILD=1 to build-image.sh to reuse a pre-built bundle.
---
 .../workflows/qemu-emulator-build-arm64.yaml  | 117 ++++++++++++------
 docker/local-emulator/qemu/build-image.sh     |   8 +-
 2 files changed, 88 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/qemu-emulator-build-arm64.yaml b/.github/workflows/qemu-emulator-build-arm64.yaml
index 5018d22c1d..d50ed633f7 100644
--- a/.github/workflows/qemu-emulator-build-arm64.yaml
+++ b/.github/workflows/qemu-emulator-build-arm64.yaml
@@ -1,9 +1,11 @@
 name: Build QEMU Emulator Image (arm64 / macOS)
 
-# arm64 emulator images are built on a macOS Apple Silicon runner so the
-# snapshot is captured under HVF — the same accelerator developer Macs use.
-# KVM snapshots (from Linux runners) are NOT resumable under HVF because
-# `-cpu max` expands to different feature sets under each accelerator.
+# arm64 emulator images are built in two stages:
+#   1. docker-build (Linux): builds the Docker container image for arm64 and
+#      exports a tarball — Docker is painful to run on macOS CI runners.
+#   2. qemu-snapshot (macOS): boots the image under HVF on Apple Silicon,
+#      provisions it, and captures a snapshot. HVF snapshots are portable to
+#      developer Macs; KVM snapshots are NOT (differing -cpu max features).
 
 on:
   push:
@@ -22,14 +24,68 @@ concurrency:
 
 env:
   EMULATOR_IMAGE_NAME: stack-local-emulator
-  EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images
-  EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run
 
 jobs:
-  build:
-    name: Build QEMU Image (arm64)
+  # ---------- Stage 1: build Docker image on Linux ----------
+  docker-build:
+    name: Build Docker Image (arm64)
+    runs-on: ubicloud-standard-8
+    timeout-minutes: 60
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Set up QEMU user-mode emulation
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - uses: pnpm/action-setup@v4
+        with:
+          version: 10.23.0
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+
+      - name: Generate emulator env
+        run: node docker/local-emulator/generate-env-development.mjs
+
+      - name: Build arm64 Docker image
+        run: |
+          docker buildx build \
+            --platform linux/arm64 \
+            --tag "$EMULATOR_IMAGE_NAME" \
+            --load \
+            -f docker/local-emulator/Dockerfile \
+            .
+
+      - name: Export Docker image bundle
+        run: |
+          mkdir -p /tmp/bundle
+          docker save "$EMULATOR_IMAGE_NAME" | gzip -c > /tmp/bundle/emulator-arm64-docker-images.tar.gz
+          docker image inspect --format '{{.ID}}' "$EMULATOR_IMAGE_NAME" > /tmp/bundle/emulator-arm64-docker-images.tar.gz.image-ids
+          ls -lh /tmp/bundle/
+
+      - name: Upload Docker bundle
+        uses: actions/upload-artifact@v4
+        with:
+          name: arm64-docker-bundle
+          path: /tmp/bundle/
+          retention-days: 1
+          compression-level: 0
+
+  # ---------- Stage 2: QEMU provision + snapshot on macOS (HVF) ----------
+  qemu-snapshot:
+    name: QEMU Snapshot (arm64 / HVF)
+    needs: docker-build
     runs-on: macos-15
     timeout-minutes: 120
+    env:
+      EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images
+      EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run
 
     steps:
       - uses: actions/checkout@v6
@@ -46,17 +102,6 @@ jobs:
       - name: Install system dependencies
         run: brew install qemu socat zstd
 
-      - name: Set up Docker via colima
-        run: |
-          brew install docker docker-buildx colima
-          # Wire up buildx as a CLI plugin
-          mkdir -p ~/.docker
-          echo '{"cliPluginsExtraDirs":["/opt/homebrew/lib/docker/cli-plugins"]}' > ~/.docker/config.json
-          # VZ driver doesn't work on GHA macOS runners — use QEMU backend
-          colima start --vm-type=qemu --cpu 4 --memory 6 --disk 60 --arch aarch64
-          docker info
-          docker buildx version
-
       - name: Verify QEMU + HVF
         run: |
           qemu-system-aarch64 --version
@@ -67,17 +112,26 @@ jobs:
             exit 1
           fi
 
-      - name: Build QEMU image
+      - name: Download Docker bundle
+        uses: actions/download-artifact@v4
+        with:
+          name: arm64-docker-bundle
+          path: ${{ env.EMULATOR_IMAGE_DIR }}/
+
+      - name: Generate emulator env
+        run: node docker/local-emulator/generate-env-development.mjs
+
+      - name: Build QEMU image (provision + snapshot)
         run: |
           chmod +x docker/local-emulator/qemu/build-image.sh
+          # SKIP_DOCKER_BUILD=1 tells build-image.sh to skip the Docker
+          # build + export steps — we already have the bundle from stage 1.
           EMULATOR_PROVISION_TIMEOUT=6000 \
+          SKIP_DOCKER_BUILD=1 \
             docker/local-emulator/qemu/build-image.sh arm64
 
-      - name: Generate emulator env
-        run: node docker/local-emulator/generate-env-development.mjs
-
-      # HVF gives us native-speed arm64 — we can verify the image boots
-      # and services come up, unlike the old cross-arch TCG path.
+      # HVF gives us native-speed arm64 — verify the image boots and
+      # services come up (previously impossible under cross-arch TCG).
       - name: Build stack-cli
         run: |
           pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...'
@@ -87,34 +141,27 @@ jobs:
         env:
           EMULATOR_ARCH: arm64
           EMULATOR_READY_TIMEOUT: 3200
-          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
-          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
         run: node packages/stack-cli/dist/index.js emulator start
 
       - name: Verify services are healthy
         env:
           EMULATOR_ARCH: arm64
-          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
-          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
         run: node packages/stack-cli/dist/index.js emulator status
 
       - name: Stop emulator
         if: always()
         env:
           EMULATOR_ARCH: arm64
-          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
-          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
         run: node packages/stack-cli/dist/index.js emulator stop
 
       - name: Print serial log on failure
         if: failure()
-        run: |
-          tail -100 "$EMULATOR_RUN_DIR/vm/serial.log" 2>/dev/null || true
+        run: tail -100 "$EMULATOR_RUN_DIR/vm/serial.log" 2>/dev/null || true
 
       - name: Package image
         run: |
-          BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-arm64.qcow2"
-          SAVEVM="docker/local-emulator/qemu/images/stack-emulator-arm64.savevm.zst"
+          BASE_IMG="$EMULATOR_IMAGE_DIR/stack-emulator-arm64.qcow2"
+          SAVEVM="$EMULATOR_IMAGE_DIR/stack-emulator-arm64.savevm.zst"
           cp "$BASE_IMG" "stack-emulator-arm64.qcow2"
           if [ -f "$SAVEVM" ]; then
             cp "$SAVEVM" "stack-emulator-arm64.savevm.zst"
diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index 295a7972ee..55d0cb2938 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -657,8 +657,12 @@ BUILD_ENV_FILE="$REPO_ROOT/docker/local-emulator/.env.development"
 for arch in "${TARGET_ARCHS[@]}"; do
   local_base="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2"
   download_cloud_image "$arch" "$local_base"
-  build_local_emulator_image "$arch"
-  prepare_bundle_artifacts "$arch"
+  if [ "${SKIP_DOCKER_BUILD:-0}" = "1" ]; then
+    log "SKIP_DOCKER_BUILD=1: reusing pre-built Docker bundle"
+  else
+    build_local_emulator_image "$arch"
+    prepare_bundle_artifacts "$arch"
+  fi
   build_one "$arch"
 done
 

From 11531ebc4b2ee0a372d67446d2f88d7612c56f10 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 18:41:13 -0700
Subject: [PATCH 15/25] fix check_deps: skip docker requirement when
 SKIP_DOCKER_BUILD=1

Also validate that the pre-built bundle exists before proceeding.
---
 docker/local-emulator/qemu/build-image.sh | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index 55d0cb2938..1b082d82c7 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -60,9 +60,12 @@ check_deps() {
     command -v "$qemu_bin" >/dev/null 2>&1 || missing+=("$qemu_bin")
   done
 
-  for cmd in qemu-img curl docker gzip; do
+  for cmd in qemu-img curl gzip; do
     command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
   done
+  if [ "${SKIP_DOCKER_BUILD:-0}" != "1" ]; then
+    command -v docker >/dev/null 2>&1 || missing+=("docker")
+  fi
 
   if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
     for cmd in socat zstd; do
@@ -659,6 +662,11 @@ for arch in "${TARGET_ARCHS[@]}"; do
   download_cloud_image "$arch" "$local_base"
   if [ "${SKIP_DOCKER_BUILD:-0}" = "1" ]; then
     log "SKIP_DOCKER_BUILD=1: reusing pre-built Docker bundle"
+    local expected_bundle="$IMAGE_DIR/emulator-${arch}-docker-images.tar.gz"
+    if [ ! -f "$expected_bundle" ]; then
+      err "Pre-built bundle not found: $expected_bundle"
+      exit 1
+    fi
   else
     build_local_emulator_image "$arch"
     prepare_bundle_artifacts "$arch"

From 753463702198c306eccaea6d7ca2c58582f7060d Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 19:12:52 -0700
Subject: [PATCH 16/25] fix lint warning + remove invalid `local` in top-level
 loop

- Split single-line try/catch to satisfy max-statements-per-line
- Remove `local` keyword from top-level for-loop (only valid in functions)
---
 docker/local-emulator/qemu/build-image.sh   | 5 ++---
 packages/stack-cli/src/commands/emulator.ts | 6 +++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index 1b082d82c7..5a8c4071ba 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -662,9 +662,8 @@ for arch in "${TARGET_ARCHS[@]}"; do
   download_cloud_image "$arch" "$local_base"
   if [ "${SKIP_DOCKER_BUILD:-0}" = "1" ]; then
     log "SKIP_DOCKER_BUILD=1: reusing pre-built Docker bundle"
-    local expected_bundle="$IMAGE_DIR/emulator-${arch}-docker-images.tar.gz"
-    if [ ! -f "$expected_bundle" ]; then
-      err "Pre-built bundle not found: $expected_bundle"
+    if [ ! -f "$IMAGE_DIR/emulator-${arch}-docker-images.tar.gz" ]; then
+      err "Pre-built bundle not found: $IMAGE_DIR/emulator-${arch}-docker-images.tar.gz"
       exit 1
     fi
   else
diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index 9088bbc3f0..942b436cd7 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -151,7 +151,11 @@ function emulatorScriptsDir(): string {
 
 // npm pack strips the execute bit from non-`bin` files, so restore it here.
 function ensureExecutable(scriptsDir: string): string {
-  try { chmodSync(join(scriptsDir, "run-emulator.sh"), 0o755); } catch { /* best-effort */ }
+  try {
+    chmodSync(join(scriptsDir, "run-emulator.sh"), 0o755);
+  } catch {
+    // best-effort
+  }
   return scriptsDir;
 }
 

From 288b80ee0d2f3253bb0dfc2783a89a290e609d1c Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Wed, 15 Apr 2026 19:33:31 -0700
Subject: [PATCH 17/25] fix empty array expansion under bash 3.2 (macOS)

macOS ships bash 3.2 which treats empty arrays as unbound under set -u.
Use ${arr[@]+"${arr[@]}"} idiom for virtfs_args and snapshot_args.
---
 docker/local-emulator/qemu/build-image.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index 5a8c4071ba..19476d658c 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -503,8 +503,8 @@ build_one() {
     -drive "file=$bundle_iso,format=raw,if=virtio,readonly=on" \
     -netdev user,id=net0 \
     -device virtio-net-pci,netdev=net0 \
-    "${virtfs_args[@]}" \
-    "${snapshot_args[@]}" \
+    ${virtfs_args[@]+"${virtfs_args[@]}"} \
+    ${snapshot_args[@]+"${snapshot_args[@]}"} \
     -serial "file:$serial_log" \
     -display none \
     -daemonize \

From d94aa661d83f4ad6f34d4e0b7e6ba484ba19c066 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Thu, 16 Apr 2026 11:12:42 -0700
Subject: [PATCH 18/25] capture emulator snapshot locally during pull instead
 of shipping from CI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

QEMU migration state isn't portable across accelerators — a KVM-captured
snapshot won't resume under HVF, and `-cpu max` feature sets differ across
hosts. Instead of trying to match every (KVM/HVF/TCG, amd64/arm64)
combination in CI, capture the snapshot on the user's own machine during
`stack emulator pull`: download the qcow2, cold-boot once, wait for all
services, QMP migrate via mapped-ram + multifd, compress. Subsequent
`stack emulator start`s fast-resume in ~3-8s as before.

- Factor qmp_session + capture_vm_state out of build-image.sh into
  common.sh so run-emulator.sh can call them.
- Add cmd_capture to run-emulator.sh. build_qemu_cmd emits the
  resume-compatible device layout (phantom ISOs, no virtfs, fsdev +
  pcie-root-port, pinned 4096MB/4CPU) with -incoming defer gated on an
  actual snapshot being present, so capture mode reuses the same path.
- Capture regenerates runtime-config.iso with STACK_EMULATOR_VM_DIR_HOST
  empty — virtfs is detached for migration compat so /host isn't mounted;
  the `install internal-pck → /host/$VM_DIR_HOST` path would otherwise
  fail and restart-loop stack.service. Mirrors build-image.sh's CI
  runtime.env shape.
- stack-cli `pull` downloads only the qcow2 then invokes run-emulator.sh
  capture. Add --skip-snapshot for CI/debug. startEmulator auto-captures
  on the auto-pull fallback.
- Revert the arm64 CI split: delete qemu-emulator-build-arm64.yaml,
  restore arm64 to the unified matrix on ubicloud-standard-8 under
  cross-arch TCG (macOS HVF runner existed only to produce a portable
  snapshot; no longer needed). Drop savevm.zst from package/upload/publish
  steps; update release notes.

Verified end-to-end on an arm64 Mac under HVF: capture 50s, fast-resume
6.5s, all services green.
---
 .../workflows/qemu-emulator-build-arm64.yaml  | 183 ------------------
 .github/workflows/qemu-emulator-build.yaml    |  58 ++----
 docker/local-emulator/qemu/build-image.sh     | 138 +------------
 docker/local-emulator/qemu/common.sh          | 139 +++++++++++++
 docker/local-emulator/qemu/run-emulator.sh    | 155 +++++++++++++--
 packages/stack-cli/src/commands/emulator.ts   |  54 ++++--
 6 files changed, 337 insertions(+), 390 deletions(-)
 delete mode 100644 .github/workflows/qemu-emulator-build-arm64.yaml

diff --git a/.github/workflows/qemu-emulator-build-arm64.yaml b/.github/workflows/qemu-emulator-build-arm64.yaml
deleted file mode 100644
index d50ed633f7..0000000000
--- a/.github/workflows/qemu-emulator-build-arm64.yaml
+++ /dev/null
@@ -1,183 +0,0 @@
-name: Build QEMU Emulator Image (arm64 / macOS)
-
-# arm64 emulator images are built in two stages:
-#   1. docker-build (Linux): builds the Docker container image for arm64 and
-#      exports a tarball — Docker is painful to run on macOS CI runners.
-#   2. qemu-snapshot (macOS): boots the image under HVF on Apple Silicon,
-#      provisions it, and captures a snapshot. HVF snapshots are portable to
-#      developer Macs; KVM snapshots are NOT (differing -cpu max features).
-
-on:
-  push:
-    branches:
-      - main
-      - dev
-  pull_request:
-    paths:
-      - 'docker/local-emulator/**'
-      - '.github/workflows/qemu-emulator-build-arm64.yaml'
-  workflow_dispatch:
-
-concurrency:
-  group: qemu-arm64-${{ github.ref }}
-  cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }}
-
-env:
-  EMULATOR_IMAGE_NAME: stack-local-emulator
-
-jobs:
-  # ---------- Stage 1: build Docker image on Linux ----------
-  docker-build:
-    name: Build Docker Image (arm64)
-    runs-on: ubicloud-standard-8
-    timeout-minutes: 60
-
-    steps:
-      - uses: actions/checkout@v6
-
-      - name: Set up QEMU user-mode emulation
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - uses: pnpm/action-setup@v4
-        with:
-          version: 10.23.0
-
-      - uses: actions/setup-node@v4
-        with:
-          node-version: 22
-          cache: pnpm
-
-      - name: Generate emulator env
-        run: node docker/local-emulator/generate-env-development.mjs
-
-      - name: Build arm64 Docker image
-        run: |
-          docker buildx build \
-            --platform linux/arm64 \
-            --tag "$EMULATOR_IMAGE_NAME" \
-            --load \
-            -f docker/local-emulator/Dockerfile \
-            .
-
-      - name: Export Docker image bundle
-        run: |
-          mkdir -p /tmp/bundle
-          docker save "$EMULATOR_IMAGE_NAME" | gzip -c > /tmp/bundle/emulator-arm64-docker-images.tar.gz
-          docker image inspect --format '{{.ID}}' "$EMULATOR_IMAGE_NAME" > /tmp/bundle/emulator-arm64-docker-images.tar.gz.image-ids
-          ls -lh /tmp/bundle/
-
-      - name: Upload Docker bundle
-        uses: actions/upload-artifact@v4
-        with:
-          name: arm64-docker-bundle
-          path: /tmp/bundle/
-          retention-days: 1
-          compression-level: 0
-
-  # ---------- Stage 2: QEMU provision + snapshot on macOS (HVF) ----------
-  qemu-snapshot:
-    name: QEMU Snapshot (arm64 / HVF)
-    needs: docker-build
-    runs-on: macos-15
-    timeout-minutes: 120
-    env:
-      EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images
-      EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run
-
-    steps:
-      - uses: actions/checkout@v6
-
-      - uses: pnpm/action-setup@v4
-        with:
-          version: 10.23.0
-
-      - uses: actions/setup-node@v4
-        with:
-          node-version: 22
-          cache: pnpm
-
-      - name: Install system dependencies
-        run: brew install qemu socat zstd
-
-      - name: Verify QEMU + HVF
-        run: |
-          qemu-system-aarch64 --version
-          if qemu-system-aarch64 -accel help 2>&1 | grep -q hvf; then
-            echo "HVF available — snapshot will be portable to developer Macs"
-          else
-            echo "::error::HVF not available on this runner"
-            exit 1
-          fi
-
-      - name: Download Docker bundle
-        uses: actions/download-artifact@v4
-        with:
-          name: arm64-docker-bundle
-          path: ${{ env.EMULATOR_IMAGE_DIR }}/
-
-      - name: Generate emulator env
-        run: node docker/local-emulator/generate-env-development.mjs
-
-      - name: Build QEMU image (provision + snapshot)
-        run: |
-          chmod +x docker/local-emulator/qemu/build-image.sh
-          # SKIP_DOCKER_BUILD=1 tells build-image.sh to skip the Docker
-          # build + export steps — we already have the bundle from stage 1.
-          EMULATOR_PROVISION_TIMEOUT=6000 \
-          SKIP_DOCKER_BUILD=1 \
-            docker/local-emulator/qemu/build-image.sh arm64
-
-      # HVF gives us native-speed arm64 — verify the image boots and
-      # services come up (previously impossible under cross-arch TCG).
-      - name: Build stack-cli
-        run: |
-          pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...'
-          pnpm exec turbo run build --filter='@stackframe/stack-cli...'
-
-      - name: Start emulator and verify
-        env:
-          EMULATOR_ARCH: arm64
-          EMULATOR_READY_TIMEOUT: 3200
-        run: node packages/stack-cli/dist/index.js emulator start
-
-      - name: Verify services are healthy
-        env:
-          EMULATOR_ARCH: arm64
-        run: node packages/stack-cli/dist/index.js emulator status
-
-      - name: Stop emulator
-        if: always()
-        env:
-          EMULATOR_ARCH: arm64
-        run: node packages/stack-cli/dist/index.js emulator stop
-
-      - name: Print serial log on failure
-        if: failure()
-        run: tail -100 "$EMULATOR_RUN_DIR/vm/serial.log" 2>/dev/null || true
-
-      - name: Package image
-        run: |
-          BASE_IMG="$EMULATOR_IMAGE_DIR/stack-emulator-arm64.qcow2"
-          SAVEVM="$EMULATOR_IMAGE_DIR/stack-emulator-arm64.savevm.zst"
-          cp "$BASE_IMG" "stack-emulator-arm64.qcow2"
-          if [ -f "$SAVEVM" ]; then
-            cp "$SAVEVM" "stack-emulator-arm64.savevm.zst"
-            ls -lh "stack-emulator-arm64.savevm.zst"
-          else
-            echo "::error::Snapshot was not produced — fast-start will be unavailable"
-            exit 1
-          fi
-
-      - name: Upload image artifact
-        uses: actions/upload-artifact@v4
-        with:
-          name: qemu-emulator-arm64
-          path: |
-            stack-emulator-arm64.qcow2
-            stack-emulator-arm64.savevm.zst
-          if-no-files-found: error
-          retention-days: 30
-          compression-level: 0
diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml
index 81cb509e4b..380a8ab1fc 100644
--- a/.github/workflows/qemu-emulator-build.yaml
+++ b/.github/workflows/qemu-emulator-build.yaml
@@ -34,12 +34,18 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          # amd64 runs natively under KVM on ubicloud's amd64 runner.
-          # arm64 is built in a separate workflow on a macOS runner (HVF)
-          # so that the snapshot is portable to developer Macs.
-          # See qemu-emulator-build-arm64.yaml.
+          # Both arches build on ubicloud's amd64 runner. amd64 uses KVM;
+          # arm64 runs under cross-arch TCG (slow, but only cloud-init
+          # provisioning has to complete — the boot/verify smoke test below
+          # is gated to amd64 because TCG can't boot Next.js in any
+          # reasonable time). Snapshots are NOT published — `stack emulator
+          # pull` captures one locally on first run, which is the only way
+          # to guarantee KVM/HVF/TCG + `-cpu max` compatibility on the
+          # user's machine.
           - arch: amd64
             runner: ubicloud-standard-8
+          - arch: arm64
+            runner: ubicloud-standard-8
 
     steps:
       - uses: actions/checkout@v6
@@ -168,29 +174,15 @@ jobs:
       - name: Package image
         run: |
           BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.qcow2"
-          SAVEVM="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.savevm.zst"
           cp "$BASE_IMG" "stack-emulator-${{ matrix.arch }}.qcow2"
-          if [ -f "$SAVEVM" ]; then
-            cp "$SAVEVM" "stack-emulator-${{ matrix.arch }}.savevm.zst"
-            ls -lh "stack-emulator-${{ matrix.arch }}.savevm.zst"
-          elif [ "${{ matrix.arch }}" = "amd64" ]; then
-            # amd64 is the fast-resume contract: if the build didn't produce a
-            # snapshot, fail loudly rather than silently shipping a
-            # cold-boot-only release.
-            echo "ERROR: snapshot build expected to produce $SAVEVM for amd64." >&2
-            exit 1
-          else
-            echo "NOTE: no savevm snapshot was produced for ${{ matrix.arch }}; fast-start will be unavailable for this arch."
-          fi
+          ls -lh "stack-emulator-${{ matrix.arch }}.qcow2"
 
       - name: Upload image artifact
         uses: actions/upload-artifact@v4
         with:
           name: qemu-emulator-${{ matrix.arch }}
-          path: |
-            stack-emulator-${{ matrix.arch }}.qcow2
-            stack-emulator-${{ matrix.arch }}.savevm.zst
-          if-no-files-found: warn
+          path: stack-emulator-${{ matrix.arch }}.qcow2
+          if-no-files-found: error
           retention-days: 30
           compression-level: 0
 
@@ -266,18 +258,14 @@ jobs:
           name: qemu-emulator-${{ matrix.arch }}
           path: ${{ github.workspace }}/.stack-emulator-images/
 
-      - name: Place images into STACK_EMULATOR_HOME layout
+      - name: Place qcow2 into STACK_EMULATOR_HOME layout
         run: |
           mkdir -p "$HOME/.stack/emulator/images"
           cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$HOME/.stack/emulator/images/"
-          if [ -f "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.savevm.zst" ]; then
-            cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.savevm.zst" "$HOME/.stack/emulator/images/"
-            echo "Snapshot present — will test snapshot-resume path."
-          else
-            echo "No snapshot — will test cold-boot path."
-          fi
           ls -lh "$HOME/.stack/emulator/images/"
 
+      # No savevm.zst artifact (users capture locally via `emulator pull`),
+      # so `emulator start` cold-boots the qcow2. Budget accordingly.
       - name: Start emulator via CLI
         run: |
           EMULATOR_ARCH=${{ matrix.arch }} \
@@ -336,11 +324,6 @@ jobs:
           for f in artifacts/qemu-emulator-*/*.qcow2; do
             cp "$f" release/
           done
-          # savevm.zst is optional — older branches may not produce it. Skip
-          # missing files rather than failing the publish.
-          for f in artifacts/qemu-emulator-*/*.savevm.zst; do
-            [ -f "$f" ] && cp "$f" release/
-          done
 
           cat > release-notes.md <<EOF
           ## QEMU Emulator Images
@@ -352,11 +335,12 @@ jobs:
           |------|-------------|
           | \`stack-emulator-arm64.qcow2\` | ARM64 disk image |
           | \`stack-emulator-amd64.qcow2\` | AMD64 disk image |
-          | \`stack-emulator-arm64.savevm.zst\` | ARM64 warm VM snapshot (fast-start) |
-          | \`stack-emulator-amd64.savevm.zst\` | AMD64 warm VM snapshot (fast-start) |
 
-          \`emulator pull\` downloads both; \`emulator start\` uses the snapshot
-          when present and falls back to cold-boot otherwise.
+          \`emulator pull\` downloads the qcow2 and captures a local fast-start
+          snapshot (~1-3 min). Subsequent \`emulator start\`s resume in ~3-8 s.
+          Snapshots are captured locally because QEMU migration state isn't
+          portable across accelerators (KVM / HVF / TCG) or \`-cpu max\`
+          feature sets.
 
           ### Usage
           \`\`\`bash
diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index 19476d658c..26f4765165 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -253,142 +253,8 @@ persist_provision_logs() {
   cp "$provision_log" "$IMAGE_DIR/provision-emulator-${arch}.progress.log" 2>/dev/null || true
 }
 
-# Open a persistent QMP session on the monitor socket, negotiate capabilities,
-# run a series of commands, and close. Commands are read from stdin (one JSON
-# object per line); responses are written to stdout. Uses socat's bidirectional
-# pipe so we can interleave request/response in one connection — QMP requires
-# qmp_capabilities to come first and keeps state across commands.
-# Keeps stdin open briefly after caller's input ends so QEMU has time to
-# process the last command before socat closes.
-qmp_session() {
-  local sock="$1"
-  local payload
-  payload="$(cat)"
-  ( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t30 - "UNIX-CONNECT:${sock}"
-}
-
-# Drive the snapshot capture over QMP:
-#   1. qmp_capabilities — exit negotiation mode.
-#   2. stop — pause the VM so no more disk writes happen.
-#   3. migrate to exec:zstd > <file via hostfs> — streams RAM/device state out.
-#   4. Poll query-migrate until status=completed (or failed).
-#   5. quit — terminate QEMU cleanly.
-capture_vm_state() {
-  local sock="$1"
-  local guest_path="$2"
-
-  if [ ! -S "$sock" ]; then
-    err "QMP monitor socket missing: $sock"
-    return 1
-  fi
-
-  log "  QMP: stopping VM..."
-  {
-    printf '%s\n' '{"execute":"qmp_capabilities"}'
-    printf '%s\n' '{"execute":"stop"}'
-  } | qmp_session "$sock" >/dev/null || {
-    err "QMP stop failed"
-    return 1
-  }
-
-  log "  QMP: enabling mapped-ram + multifd for fast resume..."
-  # mapped-ram: writes each RAM page to a fixed offset in the output file
-  # (vs the legacy streamed format). This lets the target QEMU mmap the file
-  # and fault pages lazily — and combined with multifd, load RAM in parallel.
-  # multifd-channels=4 matches our pinned SMP so the channels don't starve
-  # each other on the target's 4 vCPUs.
-  local caps_cmd params_cmd
-  caps_cmd='{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}'
-  params_cmd='{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}'
-  local setup_resp
-  setup_resp=$({
-    printf '%s\n' '{"execute":"qmp_capabilities"}'
-    printf '%s\n' "$caps_cmd"
-    printf '%s\n' "$params_cmd"
-  } | qmp_session "$sock") || {
-    err "QMP capabilities setup failed"
-    return 1
-  }
-  if printf '%s' "$setup_resp" | grep -q '"error"[[:space:]]*:'; then
-    err "QMP capabilities returned error: $setup_resp"
-    return 1
-  fi
-
-  log "  QMP: migrating RAM state to ${guest_path}..."
-  # Use file: migration (native QEMU) instead of exec: to avoid relying on a
-  # spawned shell finding zstd in PATH. Compressed as a separate host step
-  # after migrate completes.
-  local migrate_cmd
-  migrate_cmd=$(printf '{"execute":"migrate","arguments":{"uri":"file:%s"}}' "$guest_path")
-  local migrate_resp
-  migrate_resp=$({
-    printf '%s\n' '{"execute":"qmp_capabilities"}'
-    printf '%s\n' "$migrate_cmd"
-  } | qmp_session "$sock") || {
-    err "QMP migrate failed"
-    return 1
-  }
-  if printf '%s' "$migrate_resp" | grep -q '"error"[[:space:]]*:'; then
-    err "QMP migrate returned error: $migrate_resp"
-    return 1
-  fi
-
-  # Poll migration status. Migration runs in the background after the
-  # migrate command returns; we watch for "completed" or "failed".
-  local migrate_timeout=600
-  local waited=0
-  local last_heartbeat=0
-  while [ "$waited" -lt "$migrate_timeout" ]; do
-    local status_line status
-    status_line=$({
-      printf '%s\n' '{"execute":"qmp_capabilities"}'
-      printf '%s\n' '{"execute":"query-migrate"}'
-    } | qmp_session "$sock" 2>/dev/null || true)
-    status="$(printf '%s\n' "$status_line" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')"
-    case "$status" in
-      completed)
-        log "  QMP: migrate completed (${waited}s)"
-        break
-        ;;
-      failed|cancelled)
-        err "  QMP: migrate ended with status=$status"
-        err "  QMP response: $status_line"
-        return 1
-        ;;
-      active|setup|device|"")
-        # still running
-        if [ "$((waited - last_heartbeat))" -ge 30 ]; then
-          local transferred
-          transferred=$(printf '%s' "$status_line" | grep -o '"transferred"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
-          log "  QMP: migrate in progress (${waited}s, status=${status:-init}, transferred=${transferred:-0})"
-          last_heartbeat=$waited
-        fi
-        ;;
-      *)
-        log "  QMP: migrate status=$status (${waited}s)"
-        ;;
-    esac
-    sleep 2
-    waited=$((waited + 2))
-  done
-
-  if [ "$waited" -ge "$migrate_timeout" ]; then
-    err "QMP migrate timed out after ${migrate_timeout}s"
-    err "Last query-migrate response: $({
-      printf '%s\n' '{\"execute\":\"qmp_capabilities\"}'
-      printf '%s\n' '{\"execute\":\"query-migrate\"}'
-    } | qmp_session "$sock" 2>/dev/null || true)"
-    return 1
-  fi
-
-  log "  QMP: quitting VM..."
-  {
-    printf '%s\n' '{"execute":"qmp_capabilities"}'
-    printf '%s\n' '{"execute":"quit"}'
-  } | qmp_session "$sock" >/dev/null || true
-
-  return 0
-}
+# qmp_session() and capture_vm_state() live in common.sh; both build-image.sh
+# (CI) and run-emulator.sh (stack emulator pull local capture) call them.
 
 build_one() {
   local arch="$1"
diff --git a/docker/local-emulator/qemu/common.sh b/docker/local-emulator/qemu/common.sh
index 1e3374dad4..38385e308b 100755
--- a/docker/local-emulator/qemu/common.sh
+++ b/docker/local-emulator/qemu/common.sh
@@ -68,3 +68,142 @@ make_iso_from_dir() {
     exit 1
   fi
 }
+
+# Send one or more QMP commands over the monitor socket. Stdin is a stream of
+# JSON objects; qmp_capabilities is always sent first to exit negotiation mode.
+# Keep stdin open briefly after writing so socat doesn't close before QEMU
+# responds — QMP replies in milliseconds so 0.5s is plenty.
+#
+# Callers: build-image.sh capture flow, run-emulator.sh cmd_capture.
+qmp_session() {
+  local sock="$1"
+  local payload
+  payload="$(cat)"
+  ( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t30 - "UNIX-CONNECT:${sock}"
+}
+
+# Drive the snapshot capture over QMP:
+#   1. qmp_capabilities — exit negotiation mode.
+#   2. stop — pause the VM so no more disk writes happen.
+#   3. migrate-set-capabilities — enable mapped-ram + multifd for fast resume.
+#   4. migrate to file:<path> — streams RAM/device state out.
+#   5. Poll query-migrate until status=completed (or failed).
+#   6. quit — terminate QEMU cleanly.
+#
+# Depends on log/err/warn being defined by the sourcing script.
+capture_vm_state() {
+  local sock="$1"
+  local guest_path="$2"
+
+  if [ ! -S "$sock" ]; then
+    err "QMP monitor socket missing: $sock"
+    return 1
+  fi
+
+  log "  QMP: stopping VM..."
+  {
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' '{"execute":"stop"}'
+  } | qmp_session "$sock" >/dev/null || {
+    err "QMP stop failed"
+    return 1
+  }
+
+  log "  QMP: enabling mapped-ram + multifd for fast resume..."
+  # mapped-ram: writes each RAM page to a fixed offset in the output file
+  # (vs the legacy streamed format). This lets the target QEMU mmap the file
+  # and fault pages lazily — and combined with multifd, load RAM in parallel.
+  # multifd-channels=4 matches our pinned SMP so the channels don't starve
+  # each other on the target's 4 vCPUs.
+  local caps_cmd params_cmd
+  caps_cmd='{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}'
+  params_cmd='{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}'
+  local setup_resp
+  setup_resp=$({
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' "$caps_cmd"
+    printf '%s\n' "$params_cmd"
+  } | qmp_session "$sock") || {
+    err "QMP capabilities setup failed"
+    return 1
+  }
+  if printf '%s' "$setup_resp" | grep -q '"error"[[:space:]]*:'; then
+    err "QMP capabilities returned error: $setup_resp"
+    return 1
+  fi
+
+  log "  QMP: migrating RAM state to ${guest_path}..."
+  # Use file: migration (native QEMU) instead of exec: to avoid relying on a
+  # spawned shell finding zstd in PATH. Compressed as a separate host step
+  # after migrate completes.
+  local migrate_cmd
+  migrate_cmd=$(printf '{"execute":"migrate","arguments":{"uri":"file:%s"}}' "$guest_path")
+  local migrate_resp
+  migrate_resp=$({
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' "$migrate_cmd"
+  } | qmp_session "$sock") || {
+    err "QMP migrate failed"
+    return 1
+  }
+  if printf '%s' "$migrate_resp" | grep -q '"error"[[:space:]]*:'; then
+    err "QMP migrate returned error: $migrate_resp"
+    return 1
+  fi
+
+  # Poll migration status. Migration runs in the background after the
+  # migrate command returns; we watch for "completed" or "failed".
+  local migrate_timeout=600
+  local waited=0
+  local last_heartbeat=0
+  while [ "$waited" -lt "$migrate_timeout" ]; do
+    local status_line status
+    status_line=$({
+      printf '%s\n' '{"execute":"qmp_capabilities"}'
+      printf '%s\n' '{"execute":"query-migrate"}'
+    } | qmp_session "$sock" 2>/dev/null || true)
+    status="$(printf '%s\n' "$status_line" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')"
+    case "$status" in
+      completed)
+        log "  QMP: migrate completed (${waited}s)"
+        break
+        ;;
+      failed|cancelled)
+        err "  QMP: migrate ended with status=$status"
+        err "  QMP response: $status_line"
+        return 1
+        ;;
+      active|setup|device|"")
+        # still running
+        if [ "$((waited - last_heartbeat))" -ge 30 ]; then
+          local transferred
+          transferred=$(printf '%s' "$status_line" | grep -o '"transferred"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
+          log "  QMP: migrate in progress (${waited}s, status=${status:-init}, transferred=${transferred:-0})"
+          last_heartbeat=$waited
+        fi
+        ;;
+      *)
+        log "  QMP: migrate status=$status (${waited}s)"
+        ;;
+    esac
+    sleep 2
+    waited=$((waited + 2))
+  done
+
+  if [ "$waited" -ge "$migrate_timeout" ]; then
+    err "QMP migrate timed out after ${migrate_timeout}s"
+    err "Last query-migrate response: $({
+      printf '%s\n' '{\"execute\":\"qmp_capabilities\"}'
+      printf '%s\n' '{\"execute\":\"query-migrate\"}'
+    } | qmp_session "$sock" 2>/dev/null || true)"
+    return 1
+  fi
+
+  log "  QMP: quitting VM..."
+  {
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' '{"execute":"quit"}'
+  } | qmp_session "$sock" >/dev/null || true
+
+  return 0
+}
diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh
index 72f095cf57..aba9311b04 100755
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@@ -21,6 +21,13 @@ EMULATOR_NO_SNAPSHOT="${EMULATOR_NO_SNAPSHOT:-0}"
 # in place — acceptable for tests and CI that don't reach the emulator over
 # a shared network. Shaves ~2-3s off `emulator start`.
 EMULATOR_NO_ROTATION="${EMULATOR_NO_ROTATION:-0}"
+# Internal: set to 1 by cmd_capture to build QEMU with the snapshot-compatible
+# device layout (phantom ISOs, no virtfs, pcie-root-port, pinned 4096MB/4CPU)
+# without the `-incoming defer` that resume mode adds. The captured snapshot
+# must be byte-compatible with what the resume path will later feed to QEMU.
+EMULATOR_CAPTURING_SNAPSHOT="${EMULATOR_CAPTURING_SNAPSHOT:-0}"
+# Force re-capture even if a .savevm.zst is already present.
+EMULATOR_FORCE_CAPTURE="${EMULATOR_FORCE_CAPTURE:-0}"
 
 # Fixed host-side ports for the QEMU emulator (267xx range).
 # Only user-facing services are exposed; internal deps stay inside the VM.
@@ -87,7 +94,15 @@ runtime_iso_path() {
 }
 
 snapshot_available() {
-  [ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ]
+  [ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ "$EMULATOR_CAPTURING_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ]
+}
+
+# True when QEMU must use the snapshot-compatible device layout — either to
+# resume from an existing snapshot or to capture a new one. Resume adds
+# `-incoming defer`; capture does not. Everything else (phantom ISOs, no
+# virtfs, pcie-root-port, pinned RAM/SMP) matches.
+snapshot_layout() {
+  snapshot_available || [ "$EMULATOR_CAPTURING_SNAPSHOT" = "1" ]
 }
 
 # Ensure the decompressed mapped-ram cache is up-to-date with the shipped
@@ -163,6 +178,16 @@ ensure_runtime_config_iso() {
   # emulator:start`) rather than through the stack-cli, which generates the
   # ISO via packages/stack-cli/src/lib/iso.ts. Mirrors the field set + volume
   # label so the guest's render-stack-env mounts it the same way.
+  write_runtime_config_iso "$VM_DIR"
+}
+
+# Write a STACKCFG runtime-config.iso containing runtime.env + base.env.
+# The VM_DIR_HOST arg is the path to publish internal-pck / stack.log to on
+# /host; pass empty string to suppress publication (used by capture mode
+# where /host isn't mounted — virtfs is detached for snapshot compatibility,
+# so any host-side write would fail and restart-loop stack.service).
+write_runtime_config_iso() {
+  local vm_dir_host="$1"
   local base_env="$SCRIPT_DIR/../.env.development"
   if [ ! -f "$base_env" ]; then
     err "Cannot generate runtime config ISO: $base_env is missing."
@@ -179,10 +204,10 @@ ensure_runtime_config_iso() {
     printf "STACK_EMULATOR_BACKEND_HOST_PORT=%s\n" "$EMULATOR_BACKEND_PORT"
     printf "STACK_EMULATOR_MINIO_HOST_PORT=%s\n" "$EMULATOR_MINIO_PORT"
     printf "STACK_EMULATOR_INBUCKET_HOST_PORT=%s\n" "$EMULATOR_INBUCKET_PORT"
-    printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$VM_DIR"
+    printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$vm_dir_host"
   } > "$cfg_dir/runtime.env"
   cp "$base_env" "$cfg_dir/base.env"
-  make_iso_from_dir "$cfg_iso" "STACKCFG" "$cfg_dir"
+  make_iso_from_dir "$(runtime_iso_path)" "STACKCFG" "$cfg_dir"
 }
 
 service_is_up() {
@@ -259,13 +284,14 @@ build_qemu_cmd() {
   local current_fp
   current_fp="$(runtime_fingerprint "$base_img" "$savevm_file")"
 
-  if snapshot_available; then
+  if snapshot_layout; then
     # The savevm RAM state was captured against the base image's exact disk
     # state. An overlay with writes from a previous session diverges from
     # that point, so -incoming would resume RAM against inconsistent disk.
     # Always start from a fresh overlay in the snapshot path; per-session
     # state is not preserved. Users who want persistence can opt out with
-    # EMULATOR_NO_SNAPSHOT=1.
+    # EMULATOR_NO_SNAPSHOT=1. Capture mode also needs a clean overlay so the
+    # snapshot we write is taken against the base's known disk state.
     if [ -f "$VM_DIR/disk.qcow2" ]; then
       rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file"
     fi
@@ -332,12 +358,16 @@ build_qemu_cmd() {
   # exist so the migration replay matches device IDs. Runtime-only devices
   # (virtfs, balloon) live at higher slots — extra at destination is fine.
   local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" snapshot_ram="$VM_RAM"
-  if snapshot_available; then
-    log "Snapshot found at $savevm_file — fast-resume enabled."
-    # -incoming defer: QEMU starts, waits for a QMP migrate-incoming command.
-    # We use that to set mapped-ram + multifd capabilities before loading,
-    # which enables parallel RAM restore (~2-3x faster than streamed decode).
-    snapshot_args+=(-incoming defer)
+  if snapshot_layout; then
+    if snapshot_available; then
+      log "Snapshot found at $savevm_file — fast-resume enabled."
+      # -incoming defer: QEMU starts, waits for a QMP migrate-incoming command.
+      # We use that to set mapped-ram + multifd capabilities before loading,
+      # which enables parallel RAM restore (~2-3x faster than streamed decode).
+      snapshot_args+=(-incoming defer)
+    else
+      log "Capture mode: booting with snapshot-compatible layout (no -incoming)."
+    fi
     snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}"
     # RAM size is baked into the snapshot; migration replay requires an
     # identical -m value. Pin to the build-time RAM (4096) and ignore
@@ -374,7 +404,7 @@ build_qemu_cmd() {
     )
   fi
 
-  if snapshot_available; then
+  if snapshot_layout; then
     QEMU_CMD=(
       "$qemu_bin"
       -machine "$machine"
@@ -406,7 +436,7 @@ build_qemu_cmd() {
       # via human-monitor-command (errors come back as a return string,
       # not a QMP error).
       -fsdev "local,id=hostfs,path=/,security_model=none"
-      "${snapshot_args[@]}"
+      ${snapshot_args[@]+"${snapshot_args[@]}"}
       -serial "file:$VM_DIR/serial.log"
       -display none
       -daemonize
@@ -842,6 +872,100 @@ cmd_reset() {
   log "Emulator state reset. Next start will be a fresh boot."
 }
 
+# Cold-boot the VM with the snapshot-compatible device layout, wait for all
+# services to be healthy, then capture a snapshot via QMP migrate and compress
+# it to .savevm.zst. Called by `stack emulator pull` so first-run users get a
+# fast-resume snapshot that's guaranteed compatible with their host's QEMU
+# version + accelerator (which CI-built snapshots can't guarantee across
+# KVM/HVF/TCG).
+cmd_capture() {
+  if [ ! -f "$(image_path)" ]; then
+    err "Missing qcow2: $(image_path). Run 'stack emulator pull' first."
+    exit 1
+  fi
+  if [ -s "$(savevm_path)" ] && [ "$EMULATOR_FORCE_CAPTURE" != "1" ]; then
+    log "Snapshot already present at $(savevm_path); skipping capture."
+    log "Pass EMULATOR_FORCE_CAPTURE=1 to rebuild it."
+    return 0
+  fi
+  if is_running; then
+    err "Emulator is already running; stop it first (stack emulator stop)."
+    exit 1
+  fi
+
+  # Start with a clean slate if we're force-recapturing; stale raw/zst would
+  # otherwise make snapshot_available() return true and flip QEMU into
+  # -incoming defer mode.
+  rm -f "$(savevm_path)" "$(savevm_raw_path)"
+
+  ensure_ports_free
+  mkdir -p "$RUN_DIR" "$VM_DIR"
+  # Regenerate runtime-config.iso with STACK_EMULATOR_VM_DIR_HOST empty —
+  # virtfs is detached in capture mode, so run-stack-container's
+  # `install internal-pck → /host/$VM_DIR_HOST/...` would fail and restart-loop
+  # stack.service. Mirrors build-image.sh's CI runtime.env shape.
+  rm -f "$(runtime_iso_path)"
+  write_runtime_config_iso ""
+
+  info "Cold-booting VM to capture local snapshot (one-time, ~1-3 min)..."
+  EMULATOR_CAPTURING_SNAPSHOT=1
+  start_vm
+  info "VM: 4096MB / 4 CPUs (pinned for snapshot compatibility)"
+
+  # Cold boot with snapshot-compatible layout drops virtfs, so stack.service
+  # starts without /host mounted — fine for capture; hostfs is hot-plugged on
+  # resume via qmp_hotplug_9p.
+  if ! wait_for_condition "all services" "$READY_TIMEOUT" all_ready; then
+    tail_vm_logs
+    stop_vm
+    err "Services did not come up; capture aborted."
+    exit 1
+  fi
+
+  local raw tmp_raw zst tmp_zst
+  raw="$(savevm_raw_path)"
+  tmp_raw="${raw}.capture.tmp"
+  zst="$(savevm_path)"
+  tmp_zst="${zst}.capture.tmp"
+  rm -f "$tmp_raw" "$tmp_zst"
+
+  log "Capturing VM state via QMP (mapped-ram + multifd)..."
+  if ! capture_vm_state "$VM_DIR/monitor.sock" "$tmp_raw"; then
+    err "QMP capture failed."
+    stop_vm
+    exit 1
+  fi
+
+  # capture_vm_state sent QMP quit; wait for QEMU to exit, then clean sockets.
+  local waited=0
+  while [ "$waited" -lt 30 ] && is_running; do
+    sleep 1
+    waited=$((waited + 1))
+  done
+  if is_running; then
+    warn "QEMU did not exit after QMP quit; forcing."
+    stop_vm
+  fi
+  rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock"
+
+  if [ ! -s "$tmp_raw" ]; then
+    err "Captured raw file is empty: $tmp_raw"
+    exit 1
+  fi
+
+  log "Compressing snapshot with zstd..."
+  zstd -1 -T0 -f -o "$tmp_zst" "$tmp_raw"
+  mv "$tmp_zst" "$zst"
+  # Keep the uncompressed file too — resume reads it directly via mapped-ram,
+  # and ensure_savevm_raw skips re-decompression when the raw's mtime >= zst's.
+  mv "$tmp_raw" "$raw"
+  touch -r "$zst" "$raw"
+
+  local size
+  size="$(du -h "$zst" | cut -f1)"
+  log "Snapshot captured: $zst (${size})"
+}
+
 STATUS_FAILED=0
 
 print_service_status() {
@@ -889,12 +1013,12 @@ ACTION="start"
 
 while [[ $# -gt 0 ]]; do
   case "$1" in
-    start|stop|reset|status|bench)
+    start|stop|reset|status|bench|capture)
       ACTION="$1"
       shift
       ;;
     *)
-      echo "Usage: $0 [start|stop|reset|status|bench]"
+      echo "Usage: $0 [start|stop|reset|status|bench|capture]"
       exit 1
       ;;
   esac
@@ -906,4 +1030,5 @@ case "$ACTION" in
   reset) cmd_reset ;;
   status) cmd_status ;;
   bench) cmd_bench ;;
+  capture) cmd_capture ;;
 esac
diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index 942b436cd7..f9d6a6aae4 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -239,6 +239,9 @@ async function startEmulator(arch: "arm64" | "amd64"): Promise<void> {
   if (!existsSync(img)) {
     console.log("No emulator image found. Pulling latest...");
     await pullRelease(arch);
+    // Capture now so this and all subsequent starts resume fast. Skipping it
+    // would cold-boot today plus every future start (we never auto-capture).
+    await captureLocalSnapshot(arch);
   }
   prepareRuntimeConfigIso();
   await runEmulator("start", { EMULATOR_ARCH: arch });
@@ -261,25 +264,26 @@ async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branc
   mkdirSync(imageDir, { recursive: true });
 
   const diskAsset = `stack-emulator-${arch}.qcow2`;
-  // The savevm file enables the fast-resume path in run-emulator.sh. It's
-  // optional — older releases may not have it and the runtime cleanly falls
-  // back to a cold boot.
-  const snapshotAsset = `stack-emulator-${arch}.savevm.zst`;
 
   const release = await ghApi<ReleaseResponse>(`/repos/${repo}/releases/tags/${tag}`);
   const diskMatch = release.assets.find((a) => a.name === diskAsset);
   if (!diskMatch) {
     throw new CliError(`Asset ${diskAsset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`);
   }
-  const snapshotMatch = release.assets.find((a) => a.name === snapshotAsset);
   const token = githubToken();
-
   await downloadReleaseAsset(diskMatch, imageDir, diskAsset, token, tag);
-  if (snapshotMatch) {
-    await downloadReleaseAsset(snapshotMatch, imageDir, snapshotAsset, token, tag);
-  } else {
-    console.log(`Snapshot asset ${snapshotAsset} not available in release ${tag}; fast-start disabled for this image.`);
-  }
+}
+
+// Cold-boot the VM, wait for services, capture a snapshot via QMP, compress,
+// stop. Runs once per qcow2 download so subsequent `stack emulator start`s
+// resume in ~3-8s. Snapshots are always captured on the user's own machine
+// because QEMU migration state isn't portable across accelerators
+// (KVM/HVF/TCG) or `-cpu max` feature sets.
+async function captureLocalSnapshot(arch: "arm64" | "amd64"): Promise<void> {
+  preflightForVmStart("pull", arch);
+  prepareRuntimeConfigIso();
+  console.log("Capturing local snapshot (first-time, ~1-3 min cold boot + capture)...");
+  await runEmulator("capture", { EMULATOR_ARCH: arch });
 }
 
 async function downloadReleaseAsset(
@@ -491,19 +495,20 @@ export function registerEmulatorCommand(program: Command) {
 
   emulator
     .command("pull")
-    .description("Download an emulator image from GitHub Releases or a PR build")
+    .description("Download an emulator image from GitHub Releases or a PR build, then capture a local fast-start snapshot")
     .option("--arch <arch>", "Target architecture (default: current system arch)")
     .option("--branch <branch>", "Release branch (default: dev)")
     .option("--tag <tag>", "Specific release tag (default: latest)")
     .option("--repo <repo>", "GitHub repository (default: stack-auth/stack-auth)")
     .option("--pr <number>", "Pull from a PR's CI artifacts")
     .option("--run <id>", "Pull from a specific workflow run's artifacts")
-    .action(async (opts) => {
+    .option("--skip-snapshot", "Download only the qcow2; skip the one-time local snapshot capture")
+    .action(async (opts: { arch?: string, repo?: string, branch?: string, tag?: string, pr?: string, run?: string, skipSnapshot?: boolean }) => {
       const arch = resolveArch(opts.arch);
       const repo = opts.repo ?? DEFAULT_REPO;
 
       if (opts.run || opts.pr) {
-        let runId = opts.run as string | undefined;
+        let runId = opts.run;
         if (!runId) {
           console.log(`Finding latest successful build for PR #${opts.pr}...`);
           const pr = await ghApi<PullResponse>(`/repos/${repo}/pulls/${opts.pr}`);
@@ -521,22 +526,33 @@ export function registerEmulatorCommand(program: Command) {
         mkdirSync(imageDir, { recursive: true });
         const dest = join(imageDir, `stack-emulator-${arch}.qcow2`);
         const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`);
+        const snapshotRawDest = join(imageDir, `stack-emulator-${arch}.savevm.raw`);
         if (existsSync(dest)) unlinkSync(dest);
+        // Stale snapshots from a previous pull would resume against the new
+        // qcow2 and crash; wipe them so capture rebuilds cleanly.
         if (existsSync(snapshotDest)) unlinkSync(snapshotDest);
+        if (existsSync(snapshotRawDest)) unlinkSync(snapshotRawDest);
         const downloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}`, imageDir);
         if (!downloaded) {
           throw new CliError(`Artifact qemu-emulator-${arch} not found in workflow run ${runId}.`);
         }
         if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`);
         console.log(`Downloaded: ${dest}`);
-        if (existsSync(snapshotDest)) {
-          console.log(`Downloaded: ${snapshotDest}`);
-        } else {
-          console.log(`Snapshot not present in artifact for run ${runId}; fast-start disabled.`);
-        }
       } else {
+        // Same stale-snapshot concern as the PR branch above.
+        const imageDir = emulatorImageDir();
+        const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`);
+        const snapshotRawDest = join(imageDir, `stack-emulator-${arch}.savevm.raw`);
+        if (existsSync(snapshotDest)) unlinkSync(snapshotDest);
+        if (existsSync(snapshotRawDest)) unlinkSync(snapshotRawDest);
         await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag });
       }
+
+      if (opts.skipSnapshot) {
+        console.log("--skip-snapshot: not capturing a local snapshot. First `stack emulator start` will cold-boot.");
+      } else {
+        await captureLocalSnapshot(arch);
+      }
     });
 
   emulator

From 7db9fe405e009d9708e8415d12b62cf2b5cdcb89 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Thu, 16 Apr 2026 11:48:34 -0700
Subject: [PATCH 19/25] fix CI verify step: use freshly-built qcow2 via
 STACK_EMULATOR_HOME
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The stack-cli ignores EMULATOR_IMAGE_DIR / EMULATOR_RUN_DIR and derives
its own paths from STACK_EMULATOR_HOME (or $HOME/.stack/emulator as
default). Without STACK_EMULATOR_HOME set, `emulator start` in CI was
silently auto-pulling from the previous `emulator-dev-latest` release
instead of using the just-built qcow2 + savevm under ${workspace}.

Before: auto-pull got qcow2 + savevm.zst → fast-resume (worked, but
tested the PREVIOUS build, not the current one).
After my capture-locally change: auto-pull only gets qcow2 →
captureLocalSnapshot kicks off a full cold-boot + capture of a stale
image, which is slow and can hang in CI.

Point STACK_EMULATOR_HOME at the workspace so the CLI finds the
just-built images and goes straight to snapshot-resume. Update the
smoke-test job's copy step to target STACK_EMULATOR_HOME so both paths
stay consistent.
---
 .github/workflows/qemu-emulator-build.yaml | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml
index 380a8ab1fc..eaf3ee83e9 100644
--- a/.github/workflows/qemu-emulator-build.yaml
+++ b/.github/workflows/qemu-emulator-build.yaml
@@ -22,8 +22,14 @@ concurrency:
 
 env:
   EMULATOR_IMAGE_NAME: stack-local-emulator
+  # Shell scripts (build-image.sh, run-emulator.sh) read these directly.
   EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images
   EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run
+  # The stack-cli ignores EMULATOR_IMAGE_DIR/RUN_DIR and derives its own paths
+  # from STACK_EMULATOR_HOME. Point it at the same workspace so `emulator
+  # start` finds the freshly-built qcow2 + savevm from build-image.sh
+  # (snapshot-resume, fast) instead of auto-pulling from a prior release.
+  STACK_EMULATOR_HOME: ${{ github.workspace }}/docker/local-emulator/qemu
 
 jobs:
   build:
@@ -260,9 +266,9 @@ jobs:
 
       - name: Place qcow2 into STACK_EMULATOR_HOME layout
         run: |
-          mkdir -p "$HOME/.stack/emulator/images"
-          cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$HOME/.stack/emulator/images/"
-          ls -lh "$HOME/.stack/emulator/images/"
+          mkdir -p "$STACK_EMULATOR_HOME/images"
+          cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$STACK_EMULATOR_HOME/images/"
+          ls -lh "$STACK_EMULATOR_HOME/images/"
 
       # No savevm.zst artifact (users capture locally via `emulator pull`),
       # so `emulator start` cold-boots the qcow2. Budget accordingly.
@@ -293,7 +299,7 @@ jobs:
 
       - name: Print serial log on failure
         if: failure()
-        run: tail -100 $HOME/.stack/emulator/run/vm/serial.log 2>/dev/null || true
+        run: tail -100 "$STACK_EMULATOR_HOME/run/vm/serial.log" 2>/dev/null || true
 
   publish:
     name: Publish to GitHub Releases

From 510ef380157edf747e811a27fd64776fbe683651 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Thu, 16 Apr 2026 12:31:53 -0700
Subject: [PATCH 20/25] fix PCI slot mismatch in snapshot capture + stale
 runtime ISO on direct start

- build-image.sh: move runtime.iso drive before netdev so its virtio-blk
  slot precedes virtio-net-pci, matching run-emulator.sh's resume argv.
  Previously migrate-incoming against CI's savevm hit a device-tree
  mismatch and only looked green because snapshot_fallback_to_cold_boot
  silently retried as cold boot.
- run-emulator.sh: drop early-return in ensure_runtime_config_iso so
  PORT_PREFIX/EMULATOR_*_PORT changes take effect on every start; the
  preserved ISO from a prior run would otherwise silently override the
  host-forward ports picked up by QEMU's netdev.
- common.sh: fix backslash-escaped JSON in capture_vm_state's migrate-
  timeout diagnostic; single-quoted printf was emitting literal
  backslashes, so QMP replied with a parse error instead of the real
  query-migrate status.
---
 docker/local-emulator/qemu/build-image.sh  | 13 ++++++++---
 docker/local-emulator/qemu/common.sh       |  4 ++--
 docker/local-emulator/qemu/run-emulator.sh | 25 +++++++++-------------
 3 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index 26f4765165..36f76d99f4 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -333,19 +333,25 @@ build_one() {
   local monitor_sock="$tmp_dir/monitor.sock"
   local qga_sock="$tmp_dir/qga.sock"
   local snapshot_args=()
+  local runtime_disk_args=()
   local virtfs_args=(-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none")
   if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+    # STACKCFG runtime ISO lets stack.service start during the build — same
+    # disk shape render-stack-env expects at runtime. Placed before netdev
+    # so its virtio-blk PCI slot precedes virtio-net-pci, matching the
+    # resume argv order in run-emulator.sh (slots must line up or
+    # migrate-incoming fails the device-tree check).
+    runtime_disk_args=(
+      -drive "file=$runtime_iso,format=raw,if=virtio,readonly=on"
+    )
     # QMP for stop/migrate/quit; virtio-serial + QGA channel so we can exec
     # inside the guest post-resume (only needed at runtime but harmless here).
-    # STACKCFG runtime ISO lets stack.service start during the build — same
-    # disk shape render-stack-env expects at runtime.
     snapshot_args=(
       -chardev "socket,id=monitor,path=$monitor_sock,server=on,wait=off"
       -mon "chardev=monitor,mode=control"
       -chardev "socket,path=$qga_sock,server=on,wait=off,id=qga0"
       -device virtio-serial
       -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
-      -drive "file=$runtime_iso,format=raw,if=virtio,readonly=on"
       # Empty PCIe root port reserved for runtime hot-plug of virtio-9p.
       # The integrated pcie.0 bus on q35 / arm64-virt is static — hotplug
       # only works through a root port. Must be present at snapshot capture
@@ -367,6 +373,7 @@ build_one() {
     -drive "file=$tmp_img,format=qcow2,if=virtio,discard=on,detect-zeroes=unmap" \
     -drive "file=$seed_iso,format=raw,if=virtio,readonly=on" \
     -drive "file=$bundle_iso,format=raw,if=virtio,readonly=on" \
+    ${runtime_disk_args[@]+"${runtime_disk_args[@]}"} \
     -netdev user,id=net0 \
     -device virtio-net-pci,netdev=net0 \
     ${virtfs_args[@]+"${virtfs_args[@]}"} \
diff --git a/docker/local-emulator/qemu/common.sh b/docker/local-emulator/qemu/common.sh
index 38385e308b..f5d3392d9d 100755
--- a/docker/local-emulator/qemu/common.sh
+++ b/docker/local-emulator/qemu/common.sh
@@ -193,8 +193,8 @@ capture_vm_state() {
   if [ "$waited" -ge "$migrate_timeout" ]; then
     err "QMP migrate timed out after ${migrate_timeout}s"
     err "Last query-migrate response: $({
-      printf '%s\n' '{\"execute\":\"qmp_capabilities\"}'
-      printf '%s\n' '{\"execute\":\"query-migrate\"}'
+      printf '%s\n' '{"execute":"qmp_capabilities"}'
+      printf '%s\n' '{"execute":"query-migrate"}'
     } | qmp_session "$sock" 2>/dev/null || true)"
     return 1
   fi
diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh
index aba9311b04..0845ff1539 100755
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@@ -168,16 +168,12 @@ runtime_fingerprint() {
 }
 
 ensure_runtime_config_iso() {
-  local cfg_iso
-  cfg_iso="$(runtime_iso_path)"
-  if [ -s "$cfg_iso" ]; then
-    return 0
-  fi
-
-  # Fallback used when this script is invoked directly (e.g. `pnpm
-  # emulator:start`) rather than through the stack-cli, which generates the
-  # ISO via packages/stack-cli/src/lib/iso.ts. Mirrors the field set + volume
-  # label so the guest's render-stack-env mounts it the same way.
+  # Regenerate unconditionally: port env vars (PORT_PREFIX, EMULATOR_*_PORT)
+  # may have changed since the last run, and an ISO cached from a prior
+  # invocation would silently override them. The stack-cli path writes the
+  # ISO first via packages/stack-cli/src/lib/iso.ts; this re-write produces
+  # the same content for that flow (same field set + volume label) and is
+  # cheap enough (~ms) to run on every start.
   write_runtime_config_iso "$VM_DIR"
 }
 
@@ -740,10 +736,9 @@ stop_vm() {
     fi
   fi
   rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock" "$VM_DIR/serial.log"
-  # Do NOT remove runtime-config.iso: the CLI owns its lifecycle and run-emulator.sh
-  # cannot regenerate it. Removing here breaks the snapshot → cold-boot fallback
-  # (which calls stop_vm before recursing into cmd_start → ensure_runtime_config_iso).
-  # `cmd_reset` wipes $RUN_DIR entirely when a full reset is wanted.
+  # runtime-config.iso is left in place; ensure_runtime_config_iso regenerates
+  # it on the next start. `cmd_reset` wipes $RUN_DIR entirely when a full reset
+  # is wanted.
 }
 
 cmd_start() {
@@ -854,7 +849,7 @@ snapshot_fallback_to_cold_boot() {
   warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..."
   stop_vm
   # Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one.
-  # runtime-config.iso is preserved by stop_vm (the CLI owns it).
+  # runtime-config.iso is regenerated by ensure_runtime_config_iso on recursion.
   rm -f "$VM_DIR/disk.qcow2" "$VM_DIR/base-image.fingerprint" \
         "$VM_DIR/seed.phantom" "$VM_DIR/bundle.phantom"
   EMULATOR_NO_SNAPSHOT=1

From 39b5c083e704291a950678ccb91a796a06c5357c Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Thu, 16 Apr 2026 13:56:33 -0700
Subject: [PATCH 21/25] fix smoke test: skip shell ISO regen when CLI already
 wrote it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ensure_runtime_config_iso unconditionally fell through to make_iso_from_dir,
which still required hdiutil/mkisofs/genisoimage — the host dep the
lib/iso.ts TS writer was supposed to remove. The Smoke Test job doesn't
install genisoimage, so emulator start failed. CLI now sets
STACK_EMULATOR_CLI_WROTE_ISO=1 and the shell short-circuits when that flag
plus a non-empty ISO are present.
---
 docker/local-emulator/qemu/run-emulator.sh  | 18 ++++++++++++------
 packages/stack-cli/src/commands/emulator.ts |  6 +++++-
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh
index 0845ff1539..088d08a9f0 100755
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@@ -168,12 +168,18 @@ runtime_fingerprint() {
 }
 
 ensure_runtime_config_iso() {
-  # Regenerate unconditionally: port env vars (PORT_PREFIX, EMULATOR_*_PORT)
-  # may have changed since the last run, and an ISO cached from a prior
-  # invocation would silently override them. The stack-cli path writes the
-  # ISO first via packages/stack-cli/src/lib/iso.ts; this re-write produces
-  # the same content for that flow (same field set + volume label) and is
-  # cheap enough (~ms) to run on every start.
+  # When invoked via stack-cli, the CLI writes the runtime ISO natively
+  # (packages/stack-cli/src/lib/iso.ts) immediately before spawning us and
+  # sets STACK_EMULATOR_CLI_WROTE_ISO=1. Trust it and skip regeneration —
+  # otherwise we'd fall through to make_iso_from_dir and require
+  # hdiutil/mkisofs/genisoimage, which is exactly the host dep the CLI path
+  # is designed to remove.
+  if [ "$STACK_EMULATOR_CLI_WROTE_ISO" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
+    return 0
+  fi
+  # Direct-shell invocation path: regenerate unconditionally. Port env vars
+  # (PORT_PREFIX, EMULATOR_*_PORT) may have changed since the last run, and
+  # an ISO cached from a prior invocation would silently override them.
   write_runtime_config_iso "$VM_DIR"
 }
 
diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts
index f9d6a6aae4..3a67d26c07 100644
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@@ -244,7 +244,11 @@ async function startEmulator(arch: "arm64" | "amd64"): Promise<void> {
     await captureLocalSnapshot(arch);
   }
   prepareRuntimeConfigIso();
-  await runEmulator("start", { EMULATOR_ARCH: arch });
+  // Signal to run-emulator.sh that runtime-config.iso was written by the CLI
+  // via lib/iso.ts; the shell's ensure_runtime_config_iso should trust it and
+  // skip its own regeneration (which would otherwise require the
+  // hdiutil/mkisofs/genisoimage host dep the TS writer replaces).
+  await runEmulator("start", { EMULATOR_ARCH: arch, STACK_EMULATOR_CLI_WROTE_ISO: "1" });
 }
 
 export function resolveArch(raw?: string): "arm64" | "amd64" {

From 7acb3ed20b541b6ff0d66e702866a9dfc9405421 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Thu, 16 Apr 2026 15:30:46 -0700
Subject: [PATCH 22/25] fix capture path: guard against set -u + preserve
 cmd_capture's empty-host ISO

Two bugs surfaced by end-to-end testing against a freshly-built qcow2:

1. $STACK_EMULATOR_CLI_WROTE_ISO was referenced unguarded under `set -u`,
   so any code path that didn't set it (capture, direct-shell) tripped
   `unbound variable` before reaching the early-return. Use :- default.

2. ensure_runtime_config_iso was overwriting cmd_capture's specialized
   empty-VM_DIR_HOST ISO with the host-dir variant. Since virtfs is
   detached in capture mode, run-stack-container then tried to publish
   internal-pck to /host/... and restart-looped stack.service, so no
   service ever became healthy and capture aborted after 240s. Previously
   masked by snapshot_fallback_to_cold_boot; 510ef3801 fixed the fallback
   mask and exposed this. Skip regen when EMULATOR_CAPTURING_SNAPSHOT=1.
---
 docker/local-emulator/qemu/run-emulator.sh | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh
index 088d08a9f0..7b52436e19 100755
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@@ -174,7 +174,15 @@ ensure_runtime_config_iso() {
   # otherwise we'd fall through to make_iso_from_dir and require
   # hdiutil/mkisofs/genisoimage, which is exactly the host dep the CLI path
   # is designed to remove.
-  if [ "$STACK_EMULATOR_CLI_WROTE_ISO" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
+  if [ "${STACK_EMULATOR_CLI_WROTE_ISO:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
+    return 0
+  fi
+  # In capture mode, cmd_capture already wrote a specialized ISO with an
+  # empty STACK_EMULATOR_VM_DIR_HOST — required because virtfs is detached
+  # for snapshot compatibility, and run-stack-container would otherwise
+  # try to publish internal-pck to /host/... and restart-loop
+  # stack.service. Trust that write and don't overwrite it.
+  if [ "${EMULATOR_CAPTURING_SNAPSHOT:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
     return 0
   fi
   # Direct-shell invocation path: regenerate unconditionally. Port env vars

From 8f9b9c1046ced8b24fbee9880329c5e1a7621db0 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Mon, 20 Apr 2026 10:40:42 -0700
Subject: [PATCH 23/25] emulator build: split snapshot-bake from savevm capture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Splits EMULATOR_BUILD_SNAPSHOT into two independent flags:

* EMULATOR_BUILD_SNAPSHOT (default 1) — bake placeholder PCK/SSK/SAK/
  CRON_SECRET into the qcow2 so runtime rotate-secrets can swap them
  per install. Cheap; no extra wall time.
* EMULATOR_CAPTURE_SAVEVM (default 0) — start the stack, wait for
  backend+dashboard health, then capture savevm.zst via QMP. Implies
  BUILD_SNAPSHOT.

CI never captures (snapshots aren't portable across KVM/HVF/TCG; users
capture locally on first `stack emulator pull`). The previous default
of capturing in CI was wasted work on amd64 and made arm64 fail —
wait-for-stack-ready couldn't bring the stack up under cross-arch TCG
inside its 600s budget, so cloud-final.service was marked failed.
---
 .github/workflows/qemu-emulator-build.yaml    |  6 +-
 docker/local-emulator/qemu/build-image.sh     | 94 +++++++++++--------
 .../qemu/cloud-init/emulator/user-data        | 11 ++-
 3 files changed, 69 insertions(+), 42 deletions(-)

diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml
index eaf3ee83e9..5792b7f600 100644
--- a/.github/workflows/qemu-emulator-build.yaml
+++ b/.github/workflows/qemu-emulator-build.yaml
@@ -27,8 +27,10 @@ env:
   EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run
   # The stack-cli ignores EMULATOR_IMAGE_DIR/RUN_DIR and derives its own paths
   # from STACK_EMULATOR_HOME. Point it at the same workspace so `emulator
-  # start` finds the freshly-built qcow2 + savevm from build-image.sh
-  # (snapshot-resume, fast) instead of auto-pulling from a prior release.
+  # start` finds the freshly-built qcow2 from build-image.sh and cold-boots
+  # it, instead of auto-pulling from a prior release. CI doesn't capture a
+  # savevm (EMULATOR_CAPTURE_SAVEVM defaults to 0); users capture locally
+  # on first `stack emulator pull`.
   STACK_EMULATOR_HOME: ${{ github.workspace }}/docker/local-emulator/qemu
 
 jobs:
diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh
index 36f76d99f4..741d60d029 100755
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@@ -12,22 +12,34 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
 DEBIAN_VERSION="${DEBIAN_VERSION:-13}"
 DISK_SIZE="${EMULATOR_DISK_SIZE:-12G}"
 RAM="${EMULATOR_BUILD_RAM:-4096}"
-# Snapshot mode pins SMP to a fixed value so the runtime QEMU command (which
-# uses EMULATOR_CPUS, default 4) can match the source device topology — RAM
-# migration replay requires identical vCPU count.
-if [ "${EMULATOR_BUILD_SNAPSHOT:-1}" = "1" ]; then
+PROVISION_TIMEOUT="${EMULATOR_PROVISION_TIMEOUT:-3200}"
+EMULATOR_IMAGE_NAME="${EMULATOR_IMAGE_NAME:-stack-local-emulator}"
+# Snapshot-ready qcow2: bake deterministic placeholder secrets (PCK/SSK/SAK/
+# CRON_SECRET) into the image so runtime `rotate-secrets` can swap them for
+# fresh per-install values on every `emulator start`. Without this, the image
+# would ship with random shared secrets — a security regression. Cheap to
+# build (no extra wall-clock cost in CI), so it stays on by default.
+EMULATOR_BUILD_SNAPSHOT="${EMULATOR_BUILD_SNAPSHOT:-1}"
+# Capture RAM/device state via QMP at build time, producing a
+# `stack-emulator-<arch>.savevm.zst` next to the qcow2. Off by default —
+# users capture locally on first `stack emulator pull` (run-emulator.sh
+# capture) because migration state isn't portable across accelerators
+# (KVM/HVF/TCG) or `-cpu max` feature sets, so a CI-captured snapshot
+# couldn't resume reliably on arbitrary user hardware. Implies
+# EMULATOR_BUILD_SNAPSHOT=1.
+EMULATOR_CAPTURE_SAVEVM="${EMULATOR_CAPTURE_SAVEVM:-0}"
+if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ] && [ "$EMULATOR_BUILD_SNAPSHOT" != "1" ]; then
+  echo "EMULATOR_CAPTURE_SAVEVM=1 requires EMULATOR_BUILD_SNAPSHOT=1" >&2
+  exit 1
+fi
+# Capture mode pins SMP to a fixed value so the resume QEMU command (which
+# uses EMULATOR_CPUS, default 4) can match the captured device topology —
+# RAM migration replay requires identical vCPU count.
+if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
   CPUS="${EMULATOR_BUILD_CPUS:-4}"
 else
   CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}"
 fi
-PROVISION_TIMEOUT="${EMULATOR_PROVISION_TIMEOUT:-3200}"
-EMULATOR_IMAGE_NAME="${EMULATOR_IMAGE_NAME:-stack-local-emulator}"
-# Snapshot build mode: bring the VM to a fully-warm state (backend + dashboard
-# responding), then capture RAM/device state via QMP so that `emulator start`
-# can -incoming from it and return in ~3-8s. Enabled by default; set
-# EMULATOR_BUILD_SNAPSHOT=0 to fall back to the legacy "shutdown after
-# provisioning" flow.
-EMULATOR_BUILD_SNAPSHOT="${EMULATOR_BUILD_SNAPSHOT:-1}"
 
 RED='\033[0;31m'
 GREEN='\033[0;32m'
@@ -67,7 +79,7 @@ check_deps() {
     command -v docker >/dev/null 2>&1 || missing+=("docker")
   fi
 
-  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
     for cmd in socat zstd; do
       command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
     done
@@ -297,33 +309,41 @@ build_one() {
   cp "$bundle_tgz" "$bundle_dir/img.tgz"
   cp "$BUILD_ENV_FILE" "$bundle_dir/build.env"
   if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
-    # Guest reads this flag to use placeholder secrets and to wait at the end
-    # of provision-build for the host to snapshot the RAM state.
+    # Guest reads this flag to use deterministic placeholder secrets so that
+    # runtime rotate-secrets can swap them out per-install.
     printf 'STACK_EMULATOR_BUILD_SNAPSHOT=1\n' >> "$bundle_dir/build.env"
   fi
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
+    # Guest reads this flag to start stack.service during provision-build,
+    # wait for backend+dashboard health, then block forever waiting for the
+    # host to capture VM state via QMP (stop + migrate + quit).
+    printf 'STACK_EMULATOR_CAPTURE_SAVEVM=1\n' >> "$bundle_dir/build.env"
+  fi
   # Tell the guest which arch it's being built for so cross-arch (TCG) builds
   # can skip the smoke test, which isn't reliable under software emulation.
   printf 'STACK_EMULATOR_BUILD_ARCH=%s\n' "$arch" > "$bundle_dir/build-arch.env"
   make_iso_from_dir "$bundle_iso" "STACKBUNDLE" "$bundle_dir"
 
-  # render-stack-env (inside the guest) mounts a STACKCFG disk containing
-  # runtime.env + base.env. At runtime the host-side run-emulator.sh builds
-  # this ISO; at build time stack.service also starts the container, so we
-  # must provide the same shape here. Values mirror the defaults the runtime
-  # would supply — port-prefix 81 and matching host-port numbers (unused at
-  # build time since nothing is port-forwarded, but render-stack-env embeds
-  # them into /run/stack-auth/local-emulator.env).
-  mkdir -p "$runtime_cfg_dir"
-  {
-    printf 'STACK_EMULATOR_PORT_PREFIX=81\n'
-    printf 'STACK_EMULATOR_DASHBOARD_HOST_PORT=26700\n'
-    printf 'STACK_EMULATOR_BACKEND_HOST_PORT=26701\n'
-    printf 'STACK_EMULATOR_MINIO_HOST_PORT=26702\n'
-    printf 'STACK_EMULATOR_INBUCKET_HOST_PORT=26703\n'
-    printf 'STACK_EMULATOR_VM_DIR_HOST=\n'
-  } > "$runtime_cfg_dir/runtime.env"
-  cp "$BUILD_ENV_FILE" "$runtime_cfg_dir/base.env"
-  make_iso_from_dir "$runtime_iso" "STACKCFG" "$runtime_cfg_dir"
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
+    # render-stack-env (inside the guest) mounts a STACKCFG disk containing
+    # runtime.env + base.env. At runtime the host-side run-emulator.sh builds
+    # this ISO; in capture mode stack.service also starts during the build,
+    # so we must provide the same shape here. Values mirror the defaults the
+    # runtime would supply — port-prefix 81 and matching host-port numbers
+    # (unused at build time since nothing is port-forwarded, but
+    # render-stack-env embeds them into /run/stack-auth/local-emulator.env).
+    mkdir -p "$runtime_cfg_dir"
+    {
+      printf 'STACK_EMULATOR_PORT_PREFIX=81\n'
+      printf 'STACK_EMULATOR_DASHBOARD_HOST_PORT=26700\n'
+      printf 'STACK_EMULATOR_BACKEND_HOST_PORT=26701\n'
+      printf 'STACK_EMULATOR_MINIO_HOST_PORT=26702\n'
+      printf 'STACK_EMULATOR_INBUCKET_HOST_PORT=26703\n'
+      printf 'STACK_EMULATOR_VM_DIR_HOST=\n'
+    } > "$runtime_cfg_dir/runtime.env"
+    cp "$BUILD_ENV_FILE" "$runtime_cfg_dir/base.env"
+    make_iso_from_dir "$runtime_iso" "STACKCFG" "$runtime_cfg_dir"
+  fi
 
   : > "$serial_log"
   : > "$provision_log"
@@ -335,7 +355,7 @@ build_one() {
   local snapshot_args=()
   local runtime_disk_args=()
   local virtfs_args=(-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none")
-  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
     # STACKCFG runtime ISO lets stack.service start during the build — same
     # disk shape render-stack-env expects at runtime. Placed before netdev
     # so its virtio-blk PCI slot precedes virtio-net-pci, matching the
@@ -360,7 +380,7 @@ build_one() {
     )
     # QEMU disallows migration when virtfs is mounted in the guest — virtfs
     # has guest-side state (open handles, mount table) that isn't migratable.
-    # Drop the host fs mount in snapshot mode; STACK_SERVICES_READY still
+    # Drop the host fs mount in capture mode; STACK_SERVICES_READY still
     # arrives on the serial log so contains_provision_marker can detect it.
     virtfs_args=()
   fi
@@ -385,7 +405,7 @@ build_one() {
 
   pid="$(cat "$pidfile")"
   local ready_marker="STACK_CLOUD_INIT_DONE"
-  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
     ready_marker="STACK_SERVICES_READY"
   fi
   elapsed=0
@@ -450,7 +470,7 @@ build_one() {
     exit 1
   fi
 
-  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
     local savevm_file="$IMAGE_DIR/stack-emulator-${arch}.savevm.zst"
     local savevm_raw="$tmp_dir/state.raw"
     local savevm_tmp="$tmp_dir/state.zst"
diff --git a/docker/local-emulator/qemu/cloud-init/emulator/user-data b/docker/local-emulator/qemu/cloud-init/emulator/user-data
index b3c21527b4..6ec0976192 100644
--- a/docker/local-emulator/qemu/cloud-init/emulator/user-data
+++ b/docker/local-emulator/qemu/cloud-init/emulator/user-data
@@ -734,13 +734,18 @@ write_files:
       log_provision "starting slim-docker-image"
       bash /usr/local/bin/slim-docker-image
 
-      # Snapshot-build mode: bring the stack container up, wait for full
+      # Capture mode: bring the stack container up, wait for full
       # readiness, emit STACK_SERVICES_READY, then wait indefinitely for the
       # host build script to capture VM state over QMP (stop + migrate + quit).
       # The VM never shuts itself down in this path — the host tears it down
       # once the savevm file has been written.
-      if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then
-        log_provision "snapshot-build mode: starting stack.service"
+      #
+      # CI never sets STACK_EMULATOR_CAPTURE_SAVEVM=1 (snapshots aren't
+      # portable across accelerators, so they're captured locally on first
+      # `stack emulator pull`). This branch only fires for opt-in local
+      # builds run with EMULATOR_CAPTURE_SAVEVM=1.
+      if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_CAPTURE_SAVEVM=1' /etc/stack-build.env 2>/dev/null; then
+        log_provision "capture mode: starting stack.service"
         systemctl start stack.service || true
 
         log_provision "waiting for backend + dashboard to be ready"

From fbd32073005d01b9433591ba63b4c44706b82700 Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Mon, 20 Apr 2026 11:17:33 -0700
Subject: [PATCH 24/25] seed: bump session activity events tx timeout to 30s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prisma's default interactive-transaction timeout is 5s, but under
cross-arch arm64 TCG in the emulator qcow2 build this single batch
(deleteMany + createMany for events + ipInfos) takes ~10s. Bump just
this call to 30s. Production (KVM/native) runs it in <1s, so the
looser bound only engages when the DB is genuinely slow. Per-call
option — no other transaction is affected.
---
 apps/backend/src/lib/seed-dummy-data.ts | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/apps/backend/src/lib/seed-dummy-data.ts b/apps/backend/src/lib/seed-dummy-data.ts
index 346724680f..c784d6e439 100644
--- a/apps/backend/src/lib/seed-dummy-data.ts
+++ b/apps/backend/src/lib/seed-dummy-data.ts
@@ -1485,6 +1485,12 @@ async function seedDummySessionActivityEvents(options: SessionActivityEventSeedO
     await tx.event.createMany({
       data: events,
     });
+  }, {
+    // Under cross-arch arm64 TCG in the emulator qcow2 build, this batch
+    // takes ~10s; Prisma's default is 5s. Production (KVM/native) runs it
+    // in well under 1s, so the looser bound only kicks in when the DB is
+    // genuinely slow.
+    timeout: 30_000,
   });
 
   if (clickhouseClient && clickhouseRows.length > 0) {

From c8630c6e6b06b540411fa9c5baf87548c40881bf Mon Sep 17 00:00:00 2001
From: Bilal Godil <bg2002@gmail.com>
Date: Mon, 20 Apr 2026 11:54:30 -0700
Subject: [PATCH 25/25] =?UTF-8?q?emulator:=20bump=20Postgres=20statement?=
 =?UTF-8?q?=5Ftimeout=2030s=20=E2=86=92=20120s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

runBulldozerPaymentsInit / paginatedIngress issues a single
$executeRaw per row that writes a JSONB payload. Under cross-arch
arm64 TCG in the qcow2 build it takes ~31s per row and Postgres
kills it with code 57014 (canceling statement due to statement
timeout). 120s covers the observed time with a ~4× safety margin.
---
 docker/local-emulator/supervisord.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/local-emulator/supervisord.conf b/docker/local-emulator/supervisord.conf
index 6ceffee211..a9d3d51da8 100644
--- a/docker/local-emulator/supervisord.conf
+++ b/docker/local-emulator/supervisord.conf
@@ -25,7 +25,7 @@ command=/usr/lib/postgresql/16/bin/postgres
     -c max_connections=500
     -c shared_preload_libraries=pg_stat_statements
     -c pg_stat_statements.track=all
-    -c statement_timeout=30s
+    -c statement_timeout=120s
 user=postgres
 autostart=true
 autorestart=true