From 4cc1676426ef7a8c9f15556508ccea6c3925c201 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Tue, 14 Apr 2026 19:19:20 -0700 Subject: [PATCH 01/25] emulator pull progress --- packages/stack-cli/src/commands/emulator.ts | 95 +++++++++++++++++++-- 1 file changed, 90 insertions(+), 5 deletions(-) diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts index d52463b15b..9544b0c890 100644 --- a/packages/stack-cli/src/commands/emulator.ts +++ b/packages/stack-cli/src/commands/emulator.ts @@ -1,8 +1,10 @@ import { Command } from "commander"; import { execFileSync, spawn } from "child_process"; -import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs"; +import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs"; import { homedir } from "os"; import { dirname, join, resolve } from "path"; +import { Readable } from "stream"; +import { pipeline } from "stream/promises"; import { fileURLToPath } from "url"; import { CliError } from "../lib/errors.js"; @@ -145,7 +147,7 @@ async function startEmulator(arch: "arm64" | "amd64"): Promise { const img = join(emulatorImageDir(), `stack-emulator-${arch}.qcow2`); if (!existsSync(img)) { console.log("No emulator image found. Pulling latest..."); - pullRelease(arch); + await pullRelease(arch); } await runEmulator("start", { EMULATOR_ARCH: arch }); } @@ -156,7 +158,7 @@ function resolveArch(raw?: string): "arm64" | "amd64" { throw new CliError(`Invalid architecture: ${raw ?? process.arch}. Expected arm64 or amd64.`); } -function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string; branch?: string; tag?: string } = {}) { +async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branch?: string, tag?: string } = {}) { const repo = opts.repo ?? "stack-auth/stack-auth"; const branch = opts.branch ?? "dev"; const tag = opts.tag ?? `emulator-${branch}-latest`; @@ -168,15 +170,98 @@ function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string; branch?: st console.log(`Pulling ${asset} from release ${tag}...`); try { - execFileSync("gh", ["release", "download", tag, "--repo", repo, "--pattern", asset, "--output", tmpDest, "--clobber"], { stdio: "inherit" }); + const assets = JSON.parse(gh(["release", "view", tag, "--repo", repo, "--json", "assets"])) as { + assets: { name: string, apiUrl: string, size: number }[], + }; + const match = assets.assets.find((a) => a.name === asset); + if (!match) { + throw new CliError(`Asset ${asset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`); + } + const token = gh(["auth", "token"]); + await downloadWithProgress(match.apiUrl, { + Authorization: `Bearer ${token}`, + Accept: "application/octet-stream", + }, tmpDest, match.size); } catch (err) { if (existsSync(tmpDest)) unlinkSync(tmpDest); + if (err instanceof CliError) throw err; throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}\nRun 'stack emulator list-releases' to see available releases.`); } renameSync(tmpDest, dest); console.log(`Downloaded: ${dest}`); } +async function downloadWithProgress(url: string, headers: Record, dest: string, totalBytes?: number): Promise { + const res = await fetch(url, { headers, redirect: "follow" }); + if (!res.ok || !res.body) { + throw new CliError(`Download failed (${res.status} ${res.statusText}): ${url}`); + } + const total = totalBytes ?? (Number(res.headers.get("content-length")) || 0); + const isTty = Boolean(process.stderr.isTTY); + const startedAt = Date.now(); + let downloaded = 0; + let lastRender = 0; + + const render = (final: boolean) => { + const now = Date.now(); + if (!final && now - lastRender < 100) return; + lastRender = now; + const elapsed = Math.max(0.001, (now - startedAt) / 1000); + const speed = downloaded / elapsed; + const line = renderProgressLine(downloaded, total, speed); + if (isTty) { + process.stderr.write(`\r\x1b[2K${line}`); + } else if (final) { + process.stderr.write(`${line}\n`); + } + }; + + const body = Readable.fromWeb(res.body as Parameters[0]); + body.on("data", (chunk: Buffer) => { + downloaded += chunk.byteLength; + render(false); + }); + await pipeline(body, createWriteStream(dest)); + render(true); + if (isTty) process.stderr.write("\n"); +} + +function renderProgressLine(downloaded: number, total: number, bytesPerSec: number): string { + const barWidth = 30; + const pct = total > 0 ? Math.min(100, (downloaded / total) * 100) : 0; + const filled = total > 0 ? Math.round((downloaded / total) * barWidth) : 0; + const bar = "█".repeat(filled) + "░".repeat(Math.max(0, barWidth - filled)); + const pctStr = total > 0 ? `${pct.toFixed(1).padStart(5)}%` : " ? "; + const sizeStr = total > 0 ? `${formatBytes(downloaded)}/${formatBytes(total)}` : formatBytes(downloaded); + const speedStr = `${formatBytes(bytesPerSec)}/s`; + const etaStr = total > 0 && bytesPerSec > 0 ? ` eta ${formatDuration((total - downloaded) / bytesPerSec)}` : ""; + return ` [${bar}] ${pctStr} ${sizeStr} ${speedStr}${etaStr}`; +} + +function formatBytes(bytes: number): string { + if (!Number.isFinite(bytes) || bytes < 0) return "?"; + const units = ["B", "KB", "MB", "GB", "TB"]; + let v = bytes; + let i = 0; + while (v >= 1024 && i < units.length - 1) { + v /= 1024; + i++; + } + return `${v.toFixed(v < 10 && i > 0 ? 1 : 0)} ${units[i]}`; +} + +function formatDuration(seconds: number): string { + if (!Number.isFinite(seconds) || seconds < 0) return "?"; + const s = Math.round(seconds); + if (s < 60) return `${s}s`; + const m = Math.floor(s / 60); + const rs = s % 60; + if (m < 60) return `${m}m${rs.toString().padStart(2, "0")}s`; + const h = Math.floor(m / 60); + const rm = m % 60; + return `${h}h${rm.toString().padStart(2, "0")}m`; +} + export function registerEmulatorCommand(program: Command) { const emulator = program.command("emulator").description("Manage the QEMU local emulator"); @@ -216,7 +301,7 @@ export function registerEmulatorCommand(program: Command) { if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`); console.log(`Downloaded: ${dest}`); } else { - pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag }); + await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag }); } }); From a65022b8f745afd175bf0473868fdfb9b68470a8 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 11:49:52 -0700 Subject: [PATCH 02/25] emulator fast-start via VM snapshot + live secret rotation Ships a compressed RAM/device snapshot (stack-emulator-.savevm.zst) alongside the qcow2. `emulator start` resumes from it and rotates the per-install secrets in place, taking cold-boot from 30-120s to ~6-7s. Build phase adds a STACKCFG runtime ISO so stack.service can boot during image creation, starts qemu-guest-agent so its virtio-serial port stays open in the snapshot, then stop+migrate file:+quit via QMP. Runtime sends fresh secrets through QGA guest-exec input-data, which pipes them to trigger-fast-rotate and rotate-secrets inside the container: targeted sed on the placeholder PCK in built JS, UPDATE on the internal ApiKeySet, supervisorctl restart stack-app + cron-jobs. Placeholder hex values are baked in instead of random keys under STACK_EMULATOR_BUILD_SNAPSHOT=1 so no real secret ships in the snapshot. Device topology and SMP must match at capture and resume; runtime adds phantom seed/bundle drives and pins SMP=4. Cold-boot fallback kicks in automatically when the snapshot is missing, corrupt, or incompatible. supervisord.conf now uses stopasgroup/killasgroup for stack-app and cron-jobs so supervisor restart actually kills the Node children (they were keeping their port bindings and breaking rotation). --- .gitignore | 3 + docker/local-emulator/Dockerfile | 3 +- docker/local-emulator/entrypoint.sh | 8 +- docker/local-emulator/qemu/build-image.sh | 264 +++++++++++- .../qemu/cloud-init/emulator/user-data | 170 +++++++- docker/local-emulator/qemu/run-emulator.sh | 375 ++++++++++++++++-- docker/local-emulator/rotate-secrets.sh | 104 +++++ docker/local-emulator/run-cron-jobs.sh | 8 + docker/local-emulator/supervisord.conf | 21 + docker/server/entrypoint.sh | 87 ++-- packages/stack-cli/src/commands/emulator.ts | 55 ++- 11 files changed, 987 insertions(+), 111 deletions(-) create mode 100644 docker/local-emulator/rotate-secrets.sh diff --git a/.gitignore b/.gitignore index eab9ce0b48..8fa5c69404 100644 --- a/.gitignore +++ b/.gitignore @@ -144,3 +144,6 @@ packages/stack/* !packages/react/package.json !packages/next/package.json !packages/stack/package.json + +# claude code +.claude/scheduled_tasks.lock diff --git a/docker/local-emulator/Dockerfile b/docker/local-emulator/Dockerfile index 56deae7882..138270b405 100644 --- a/docker/local-emulator/Dockerfile +++ b/docker/local-emulator/Dockerfile @@ -262,10 +262,11 @@ COPY docker/local-emulator/run-cron-jobs.sh /run-cron-jobs.sh COPY docker/local-emulator/entrypoint.sh /entrypoint.sh COPY docker/local-emulator/init-services.sh /init-services.sh COPY docker/local-emulator/start-app.sh /start-app.sh +COPY docker/local-emulator/rotate-secrets.sh /usr/local/bin/rotate-secrets COPY docker/local-emulator/clickhouse-config.xml /etc/clickhouse-server/config.xml COPY docker/local-emulator/clickhouse-users.xml /etc/clickhouse-server/users.xml COPY docker/server/entrypoint.sh /app-entrypoint.sh -RUN chmod +x /entrypoint.sh /init-services.sh /start-app.sh /app-entrypoint.sh /run-cron-jobs.sh +RUN chmod +x /entrypoint.sh /init-services.sh /start-app.sh /app-entrypoint.sh /run-cron-jobs.sh /usr/local/bin/rotate-secrets # PostgreSQL: 5432, Redis: 6379, Inbucket: 2500/9001/1100, # Svix: 8071, ClickHouse: 8123/9009, MinIO: 9090, QStash: 8080 diff --git a/docker/local-emulator/entrypoint.sh b/docker/local-emulator/entrypoint.sh index 562cb67955..58157c7914 100644 --- a/docker/local-emulator/entrypoint.sh +++ b/docker/local-emulator/entrypoint.sh @@ -33,6 +33,12 @@ fi # baked-in mock value from .env.development to be a usable credential against # a running emulator. Overriding here propagates to both the backend and the # run-cron-jobs.sh loop via supervisord's inherited environment. -export CRON_SECRET="$(openssl rand -hex 32)" +# +# In snapshot-build mode the VM supplies a deterministic placeholder via the +# --env-file so the baked snapshot doesn't contain a real secret; on resume, +# /usr/local/bin/rotate-secrets swaps in a fresh per-install value. +if [ -z "${CRON_SECRET:-}" ]; then + export CRON_SECRET="$(openssl rand -hex 32)" +fi exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh index f4d91771b7..0babc5e2b1 100755 --- a/docker/local-emulator/qemu/build-image.sh +++ b/docker/local-emulator/qemu/build-image.sh @@ -12,9 +12,22 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" DEBIAN_VERSION="${DEBIAN_VERSION:-13}" DISK_SIZE="${EMULATOR_DISK_SIZE:-12G}" RAM="${EMULATOR_BUILD_RAM:-4096}" -CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}" +# Snapshot mode pins SMP to a fixed value so the runtime QEMU command (which +# uses EMULATOR_CPUS, default 4) can match the source device topology — RAM +# migration replay requires identical vCPU count. +if [ "${EMULATOR_BUILD_SNAPSHOT:-1}" = "1" ]; then + CPUS="${EMULATOR_BUILD_CPUS:-4}" +else + CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}" +fi PROVISION_TIMEOUT="${EMULATOR_PROVISION_TIMEOUT:-3200}" EMULATOR_IMAGE_NAME="${EMULATOR_IMAGE_NAME:-stack-local-emulator}" +# Snapshot build mode: bring the VM to a fully-warm state (backend + dashboard +# responding), then capture RAM/device state via QMP so that `emulator start` +# can -incoming from it and return in ~3-8s. Enabled by default; set +# EMULATOR_BUILD_SNAPSHOT=0 to fall back to the legacy "shutdown after +# provisioning" flow. +EMULATOR_BUILD_SNAPSHOT="${EMULATOR_BUILD_SNAPSHOT:-1}" RED='\033[0;31m' GREEN='\033[0;32m' @@ -51,6 +64,12 @@ check_deps() { command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd") done + if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + for cmd in socat zstd; do + command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd") + done + fi + if ! command -v mkisofs >/dev/null 2>&1 && ! command -v genisoimage >/dev/null 2>&1 && ! command -v hdiutil >/dev/null 2>&1; then missing+=("mkisofs/genisoimage/hdiutil") fi @@ -231,6 +250,116 @@ persist_provision_logs() { cp "$provision_log" "$IMAGE_DIR/provision-emulator-${arch}.progress.log" 2>/dev/null || true } +# Open a persistent QMP session on the monitor socket, negotiate capabilities, +# run a series of commands, and close. Commands are read from stdin (one JSON +# object per line); responses are written to stdout. Uses socat's bidirectional +# pipe so we can interleave request/response in one connection — QMP requires +# qmp_capabilities to come first and keeps state across commands. +qmp_session() { + local sock="$1" + socat -t30 - "UNIX-CONNECT:${sock}" +} + +# Drive the snapshot capture over QMP: +# 1. qmp_capabilities — exit negotiation mode. +# 2. stop — pause the VM so no more disk writes happen. +# 3. migrate to exec:zstd > — streams RAM/device state out. +# 4. Poll query-migrate until status=completed (or failed). +# 5. quit — terminate QEMU cleanly. +capture_vm_state() { + local sock="$1" + local guest_path="$2" + + if [ ! -S "$sock" ]; then + err "QMP monitor socket missing: $sock" + return 1 + fi + + log " QMP: stopping VM..." + { + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' '{"execute":"stop"}' + } | qmp_session "$sock" >/dev/null || { + err "QMP stop failed" + return 1 + } + + log " QMP: migrating RAM state to ${guest_path}..." + # Use file: migration (native QEMU) instead of exec: to avoid relying on a + # spawned shell finding zstd in PATH. We compress as a separate host step + # after migrate completes. + local migrate_cmd + migrate_cmd=$(printf '{"execute":"migrate","arguments":{"uri":"file:%s"}}' "$guest_path") + local migrate_resp + migrate_resp=$({ + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' "$migrate_cmd" + } | qmp_session "$sock") || { + err "QMP migrate failed" + return 1 + } + if printf '%s' "$migrate_resp" | grep -q '"error"[[:space:]]*:'; then + err "QMP migrate returned error: $migrate_resp" + return 1 + fi + + # Poll migration status. Migration runs in the background after the + # migrate command returns; we watch for "completed" or "failed". + local migrate_timeout=600 + local waited=0 + local last_heartbeat=0 + while [ "$waited" -lt "$migrate_timeout" ]; do + local status_line status + status_line=$({ + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' '{"execute":"query-migrate"}' + } | qmp_session "$sock" 2>/dev/null || true) + status="$(printf '%s\n' "$status_line" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')" + case "$status" in + completed) + log " QMP: migrate completed (${waited}s)" + break + ;; + failed|cancelled) + err " QMP: migrate ended with status=$status" + err " QMP response: $status_line" + return 1 + ;; + active|setup|device|"") + # still running + if [ "$((waited - last_heartbeat))" -ge 30 ]; then + local transferred + transferred=$(printf '%s' "$status_line" | grep -o '"transferred"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/') + log " QMP: migrate in progress (${waited}s, status=${status:-init}, transferred=${transferred:-0})" + last_heartbeat=$waited + fi + ;; + *) + log " QMP: migrate status=$status (${waited}s)" + ;; + esac + sleep 2 + waited=$((waited + 2)) + done + + if [ "$waited" -ge "$migrate_timeout" ]; then + err "QMP migrate timed out after ${migrate_timeout}s" + err "Last query-migrate response: $({ + printf '%s\n' '{\"execute\":\"qmp_capabilities\"}' + printf '%s\n' '{\"execute\":\"query-migrate\"}' + } | qmp_session "$sock" 2>/dev/null || true)" + return 1 + fi + + log " QMP: quitting VM..." + { + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' '{"execute":"quit"}' + } | qmp_session "$sock" >/dev/null || true + + return 0 +} + build_one() { local arch="$1" local base_img="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2" @@ -245,7 +374,9 @@ build_one() { local tmp_img="$tmp_dir/disk.qcow2" local seed_iso="$tmp_dir/seed.iso" local bundle_iso="$tmp_dir/bundle.iso" + local runtime_iso="$tmp_dir/runtime.iso" local bundle_dir="$tmp_dir/bundle" + local runtime_cfg_dir="$tmp_dir/runtime" local serial_log="$tmp_dir/serial.log" local provision_log="$tmp_dir/provision.log" local pidfile="$tmp_dir/qemu.pid" @@ -269,16 +400,64 @@ build_one() { mkdir -p "$bundle_dir" cp "$bundle_tgz" "$bundle_dir/img.tgz" cp "$BUILD_ENV_FILE" "$bundle_dir/build.env" + if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + # Guest reads this flag to use placeholder secrets and to wait at the end + # of provision-build for the host to snapshot the RAM state. + printf 'STACK_EMULATOR_BUILD_SNAPSHOT=1\n' >> "$bundle_dir/build.env" + fi # Tell the guest which arch it's being built for so cross-arch (TCG) builds # can skip the smoke test, which isn't reliable under software emulation. printf 'STACK_EMULATOR_BUILD_ARCH=%s\n' "$arch" > "$bundle_dir/build-arch.env" make_iso_from_dir "$bundle_iso" "STACKBUNDLE" "$bundle_dir" + # render-stack-env (inside the guest) mounts a STACKCFG disk containing + # runtime.env + base.env. At runtime the host-side run-emulator.sh builds + # this ISO; at build time stack.service also starts the container, so we + # must provide the same shape here. Values mirror the defaults the runtime + # would supply — port-prefix 81 and matching host-port numbers (unused at + # build time since nothing is port-forwarded, but render-stack-env embeds + # them into /run/stack-auth/local-emulator.env). + mkdir -p "$runtime_cfg_dir" + { + printf 'STACK_EMULATOR_PORT_PREFIX=81\n' + printf 'STACK_EMULATOR_DASHBOARD_HOST_PORT=26700\n' + printf 'STACK_EMULATOR_BACKEND_HOST_PORT=26701\n' + printf 'STACK_EMULATOR_MINIO_HOST_PORT=26702\n' + printf 'STACK_EMULATOR_INBUCKET_HOST_PORT=26703\n' + printf 'STACK_EMULATOR_VM_DIR_HOST=\n' + } > "$runtime_cfg_dir/runtime.env" + cp "$BUILD_ENV_FILE" "$runtime_cfg_dir/base.env" + make_iso_from_dir "$runtime_iso" "STACKCFG" "$runtime_cfg_dir" + : > "$serial_log" : > "$provision_log" qemu_base="$(qemu_cmd_prefix_for_arch "$arch")" log "QEMU command prefix (${arch}): $qemu_base" + local monitor_sock="$tmp_dir/monitor.sock" + local qga_sock="$tmp_dir/qga.sock" + local snapshot_args=() + local virtfs_args=(-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none") + if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + # QMP for stop/migrate/quit; virtio-serial + QGA channel so we can exec + # inside the guest post-resume (only needed at runtime but harmless here). + # STACKCFG runtime ISO lets stack.service start during the build — same + # disk shape render-stack-env expects at runtime. + snapshot_args=( + -chardev "socket,id=monitor,path=$monitor_sock,server=on,wait=off" + -mon "chardev=monitor,mode=control" + -chardev "socket,path=$qga_sock,server=on,wait=off,id=qga0" + -device virtio-serial + -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0" + -drive "file=$runtime_iso,format=raw,if=virtio,readonly=on" + ) + # QEMU disallows migration when virtfs is mounted in the guest — virtfs + # has guest-side state (open handles, mount table) that isn't migratable. + # Drop the host fs mount in snapshot mode; STACK_SERVICES_READY still + # arrives on the serial log so contains_provision_marker can detect it. + virtfs_args=() + fi + # shellcheck disable=SC2086 $qemu_base \ -boot order=c \ @@ -289,16 +468,21 @@ build_one() { -drive "file=$bundle_iso,format=raw,if=virtio,readonly=on" \ -netdev user,id=net0 \ -device virtio-net-pci,netdev=net0 \ - -virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none" \ + "${virtfs_args[@]}" \ + "${snapshot_args[@]}" \ -serial "file:$serial_log" \ -display none \ -daemonize \ -pidfile "$pidfile" pid="$(cat "$pidfile")" + local ready_marker="STACK_CLOUD_INIT_DONE" + if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + ready_marker="STACK_SERVICES_READY" + fi elapsed=0 while [ "$elapsed" -lt "$PROVISION_TIMEOUT" ]; do - if contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then + if contains_provision_marker "$provision_log" "$serial_log" "$ready_marker"; then break fi @@ -312,7 +496,7 @@ build_one() { if [ "$total_build_lines" -gt "$last_build_lines" ]; then echo "" sed -n "$((last_build_lines + 1)),${total_build_lines}p" "$provision_log" 2>/dev/null | while IFS= read -r msg; do - if [ "$msg" = "STACK_CLOUD_INIT_DONE" ]; then + if [ "$msg" = "STACK_CLOUD_INIT_DONE" ] || [ "$msg" = "STACK_SERVICES_READY" ]; then continue fi printf " [%3ds] %s\n" "$elapsed" "$msg" @@ -332,7 +516,7 @@ build_one() { done echo "" - if ! contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then + if ! contains_provision_marker "$provision_log" "$serial_log" "$ready_marker"; then if [ "$guest_failed" = true ]; then err "Guest provisioning reported failure for emulator (${arch})" elif [ "$guest_exited" = true ]; then @@ -358,17 +542,67 @@ build_one() { exit 1 fi - local shutdown_wait=0 - while [ "$shutdown_wait" -lt 90 ] && kill -0 "$pid" 2>/dev/null; do - sleep 1 - shutdown_wait=$((shutdown_wait + 1)) - done + if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + local savevm_file="$IMAGE_DIR/stack-emulator-${arch}.savevm.zst" + local savevm_raw="$tmp_dir/state.raw" + local savevm_tmp="$tmp_dir/state.zst" + + # Capture raw RAM/device state via QEMU's native file: migration; then + # compress on the host side. Avoids any reliance on QEMU spawning a shell + # that has zstd in PATH. + log "Capturing VM state via QMP (${arch})..." + if ! capture_vm_state "$monitor_sock" "$savevm_raw"; then + err "Failed to capture VM state for ${arch}" + if kill -0 "$pid" 2>/dev/null; then + kill "$pid" 2>/dev/null || true + sleep 1 + kill -9 "$pid" 2>/dev/null || true + fi + persist_provision_logs "$arch" "$serial_log" "$provision_log" + rm -rf "$tmp_dir" + exit 1 + fi - if kill -0 "$pid" 2>/dev/null; then - warn "Guest did not power off cleanly; forcing shutdown." - kill "$pid" 2>/dev/null || true - sleep 2 - kill -9 "$pid" 2>/dev/null || true + # QEMU exited cleanly via `quit`. Wait briefly to release the pid file. + local shutdown_wait=0 + while [ "$shutdown_wait" -lt 30 ] && kill -0 "$pid" 2>/dev/null; do + sleep 1 + shutdown_wait=$((shutdown_wait + 1)) + done + if kill -0 "$pid" 2>/dev/null; then + warn "QEMU did not exit after quit; forcing." + kill "$pid" 2>/dev/null || true + sleep 2 + kill -9 "$pid" 2>/dev/null || true + fi + + if [ ! -s "$savevm_raw" ]; then + err "VM state file missing or empty at $savevm_raw" + persist_provision_logs "$arch" "$serial_log" "$provision_log" + rm -rf "$tmp_dir" + exit 1 + fi + + log "Compressing VM state with zstd..." + zstd -3 -T0 --rm -o "$savevm_tmp" "$savevm_raw" + + mv "$savevm_tmp" "$savevm_file" + local savevm_size + savevm_size="$(du -h "$savevm_file" | cut -f1)" + log "Saved VM state: $savevm_file (${savevm_size})" + else + local shutdown_wait=0 + while [ "$shutdown_wait" -lt 90 ] && kill -0 "$pid" 2>/dev/null; do + sleep 1 + shutdown_wait=$((shutdown_wait + 1)) + done + + if kill -0 "$pid" 2>/dev/null; then + warn "Guest did not power off cleanly; forcing shutdown." + kill "$pid" 2>/dev/null || true + sleep 2 + kill -9 "$pid" 2>/dev/null || true + fi fi persist_provision_logs "$arch" "$serial_log" "$provision_log" diff --git a/docker/local-emulator/qemu/cloud-init/emulator/user-data b/docker/local-emulator/qemu/cloud-init/emulator/user-data index 38fe2b0646..8a968bf079 100644 --- a/docker/local-emulator/qemu/cloud-init/emulator/user-data +++ b/docker/local-emulator/qemu/cloud-init/emulator/user-data @@ -75,12 +75,24 @@ write_files: # ssk/sak: required by the emulator's own dashboard (StackServerApp # construction throws without them). Not used by user-app flows; the # /local-emulator/project route mints separate per-project credentials. + # + # Snapshot-build mode (STACK_EMULATOR_BUILD_SNAPSHOT=1 in /etc/stack-build.env): + # use deterministic placeholder hex strings instead of random values. The + # built image then contains these placeholders; at every `emulator start` + # resume the host generates fresh per-install secrets and + # /usr/local/bin/rotate-secrets (inside the stack container) swaps them in. umask 077 - for key in internal-pck internal-ssk internal-sak; do - if [ ! -s "/var/lib/stack-auth/$key" ]; then - openssl rand -hex 32 > "/var/lib/stack-auth/$key" - fi - done + if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then + printf '%s' '00000000000000000000000000000000ffffffffffffffffffffffffffffffff' > /var/lib/stack-auth/internal-pck + printf '%s' '00000000000000000000000000000000eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee' > /var/lib/stack-auth/internal-ssk + printf '%s' '00000000000000000000000000000000dddddddddddddddddddddddddddddddd' > /var/lib/stack-auth/internal-sak + else + for key in internal-pck internal-ssk internal-sak; do + if [ ! -s "/var/lib/stack-auth/$key" ]; then + openssl rand -hex 32 > "/var/lib/stack-auth/$key" + fi + done + fi INTERNAL_PCK="$(cat /var/lib/stack-auth/internal-pck)" INTERNAL_SSK="$(cat /var/lib/stack-auth/internal-ssk)" INTERNAL_SAK="$(cat /var/lib/stack-auth/internal-sak)" @@ -92,6 +104,15 @@ write_files: HOST_SERVICES_HOST=10.0.2.2 P="$STACK_EMULATOR_PORT_PREFIX" + # Snapshot-build mode: ship a deterministic placeholder CRON_SECRET so the + # baked VM contains a known-public value that rotate-secrets swaps out on + # every resume. Outside snapshot-build mode, leave CRON_SECRET unset so + # docker/local-emulator/entrypoint.sh generates a fresh random one. + EMULATOR_CRON_SECRET="" + if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then + EMULATOR_CRON_SECRET="00000000000000000000000000000000cccccccccccccccccccccccccccccccc" + fi + { # Static vars from base config and runtime (e.g. API keys, feature flags) cat /mnt/stack-runtime/base.env @@ -99,6 +120,9 @@ write_files: printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$INTERNAL_PCK" printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$INTERNAL_SSK" printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$INTERNAL_SAK" + if [ -n "$EMULATOR_CRON_SECRET" ]; then + printf 'CRON_SECRET=%s\n' "$EMULATOR_CRON_SECRET" + fi # Computed vars — depend on port prefix or deps host # Host-side ports (for browser URLs — browser runs on host, not in VM) @@ -142,14 +166,20 @@ write_files: permissions: '0755' content: | #!/bin/bash - set -euo pipefail + set -uo pipefail mkdir -p /host - if ! mountpoint -q /host; then - if ! mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host; then - echo "Failed to mount host filesystem at /host" >&2 - exit 1 - fi + if mountpoint -q /host; then + exit 0 + fi + # In snapshot-build mode the host detaches virtfs (QEMU disallows + # migration while it's mounted), and at runtime we re-attach it. Tolerate + # both states: try to mount, fall through to an empty /host if no + # virtio-9p channel is available. + if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host 2>/dev/null; then + exit 0 fi + echo "host filesystem unavailable; continuing with empty /host" >&2 + exit 0 - path: /usr/local/bin/run-stack-container permissions: '0755' @@ -522,6 +552,74 @@ write_files: fstrim -av 2>/dev/null || true log "slim-docker-image done." + - path: /usr/local/bin/wait-for-stack-ready + permissions: '0755' + content: | + #!/bin/bash + # Poll the stack container's backend + dashboard on the guest's own + # localhost until both respond healthy. Used at snapshot-build time to + # gate "emit STACK_SERVICES_READY" on the app actually being warm. + set -uo pipefail + + TIMEOUT="${STACK_READY_TIMEOUT:-600}" + BACKEND_PORT="${STACK_READY_BACKEND_PORT:-8102}" + DASHBOARD_PORT="${STACK_READY_DASHBOARD_PORT:-8101}" + + log() { /usr/local/bin/log-provision "wait-for-stack-ready: $*"; } + + start=$SECONDS + next_heartbeat=$((start + 30)) + log "waiting for backend:$BACKEND_PORT and dashboard:$DASHBOARD_PORT (timeout=${TIMEOUT}s)" + while true; do + backend_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:${BACKEND_PORT}/health?db=1" 2>/dev/null || true) + dashboard_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:${DASHBOARD_PORT}/handler/sign-in" 2>/dev/null || true) + if [ "$backend_code" = "200" ] && [ "$dashboard_code" = "200" ]; then + log "ready ($((SECONDS - start))s)" + exit 0 + fi + if [ "$SECONDS" -ge "$next_heartbeat" ]; then + log "still waiting (backend=$backend_code dashboard=$dashboard_code, $((SECONDS - start))s elapsed)" + next_heartbeat=$((SECONDS + 30)) + fi + if [ "$((SECONDS - start))" -ge "$TIMEOUT" ]; then + log "TIMEOUT after $((SECONDS - start))s (backend=$backend_code dashboard=$dashboard_code)" + docker ps -a 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: ps" || true + docker logs --tail 200 stack 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: stack" || true + systemctl status stack.service --no-pager -l 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: svc" || true + journalctl -u stack.service --no-pager -n 100 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: jrnl" || true + docker image ls 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: img" || true + exit 1 + fi + sleep 2 + done + + - path: /usr/local/bin/trigger-fast-rotate + permissions: '0755' + content: | + #!/bin/bash + # Called via qemu-guest-agent on every snapshot resume. Reads fresh + # secrets from stdin (key=value lines, written by the host via QGA's + # guest-exec input-data) and execs rotate-secrets inside the stack + # container with those values exported. + set -euo pipefail + + tmp="$(mktemp /var/run/stack-fresh-XXXXXX.env)" + cat > "$tmp" + chmod 0600 "$tmp" + + # shellcheck disable=SC1090 + set -a + source "$tmp" + set +a + rm -f "$tmp" + + exec docker exec \ + -e STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY \ + -e STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY \ + -e STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY \ + -e CRON_SECRET \ + stack /usr/local/bin/rotate-secrets + - path: /etc/systemd/system/stack.service content: | [Unit] @@ -591,6 +689,14 @@ write_files: systemctl disable --now ssh || true systemctl mask ssh || true + # qemu-guest-agent: used by the host to inject fresh secrets + trigger + # rotate-secrets after a snapshot resume. Must be running INSIDE the VM + # at snapshot capture time — the virtio-serial port's "open" state is + # part of the migrated device state. If QGA wasn't connected at capture, + # the resumed VM's port stays closed and the host can't reach it. + systemctl enable qemu-guest-agent || true + systemctl start qemu-guest-agent || true + log_provision "installing emulator containers" bash /usr/local/bin/install-emulator-containers @@ -603,6 +709,48 @@ write_files: log_provision "starting slim-docker-image" bash /usr/local/bin/slim-docker-image + # Snapshot-build mode: bring the stack container up, wait for full + # readiness, emit STACK_SERVICES_READY, then wait indefinitely for the + # host build script to capture VM state over QMP (stop + migrate + quit). + # The VM never shuts itself down in this path — the host tears it down + # once the savevm file has been written. + if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then + log_provision "snapshot-build mode: starting stack.service" + systemctl start stack.service || true + + log_provision "waiting for backend + dashboard to be ready" + if ! /usr/local/bin/wait-for-stack-ready; then + log_provision "ERROR: stack services did not become ready" + exit 1 + fi + + # Ensure qemu-guest-agent is running so its virtio-serial port stays + # "open" in the snapshot — the host needs that port at runtime to + # trigger rotate-secrets. + log_provision "ensuring qemu-guest-agent is up" + systemctl restart qemu-guest-agent || true + sleep 2 + if ! systemctl is-active --quiet qemu-guest-agent; then + log_provision "ERROR: qemu-guest-agent failed to start" + systemctl status qemu-guest-agent --no-pager -l 2>&1 | /usr/local/bin/log-provision-stream "qga" + exit 1 + fi + log_provision "qemu-guest-agent active" + + log_provision "services ready; signalling STACK_SERVICES_READY" + if [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then + printf '%s\n' "STACK_SERVICES_READY" >> "$STACK_PROVISION_LOG_FILE" + fi + write_marker_to_consoles "STACK_SERVICES_READY" + sync || true + + # Clear the EXIT trap so the cleanup path doesn't mark this as failed + # when the host powers us off via QMP quit. + trap - EXIT + # Block forever; host will issue qmp quit after migrate completes. + while true; do sleep 3600; done + fi + log_provision "build pipeline complete" if [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then printf '%s\n' "STACK_CLOUD_INIT_DONE" >> "$STACK_PROVISION_LOG_FILE" diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh index ba905ca36d..9181e527b3 100755 --- a/docker/local-emulator/qemu/run-emulator.sh +++ b/docker/local-emulator/qemu/run-emulator.sh @@ -12,6 +12,11 @@ VM_RAM="${EMULATOR_RAM:-4096}" VM_CPUS="${EMULATOR_CPUS:-4}" PORT_PREFIX="${PORT_PREFIX:-${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}}" READY_TIMEOUT="${EMULATOR_READY_TIMEOUT:-240}" +# Shorter timeout when resuming from a snapshot: services are already running, +# we only need to wait for rotate-secrets + Node restart (~3-10s). +SNAPSHOT_READY_TIMEOUT="${EMULATOR_SNAPSHOT_READY_TIMEOUT:-45}" +# Set to 1 to force a cold boot and ignore any shipped savevm file. +EMULATOR_NO_SNAPSHOT="${EMULATOR_NO_SNAPSHOT:-0}" # Fixed host-side ports for the QEMU emulator (267xx range). # Only user-facing services are exposed; internal deps stay inside the VM. @@ -62,10 +67,18 @@ image_path() { echo "$IMAGE_DIR/stack-emulator-$ARCH.qcow2" } +savevm_path() { + echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.zst" +} + runtime_iso_path() { echo "$VM_DIR/runtime-config.iso" } +snapshot_available() { + [ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ] +} + # Returns a fast fingerprint (size:mtime) of the base QEMU image. # Used to detect whether the image has changed since the overlay was created. base_image_fingerprint() { @@ -77,6 +90,23 @@ base_image_fingerprint() { esac } +# Fingerprint used to detect stale overlays. Includes both the base qcow2 and +# the savevm file so the overlay is rebuilt whenever either input changes. The +# overlay disk must match the disk state the snapshot was taken against for +# -incoming resume to be consistent. +runtime_fingerprint() { + local base="$1" + local savevm="$2" + local base_fp savevm_fp + base_fp="$(base_image_fingerprint "$base")" + if [ -f "$savevm" ]; then + savevm_fp="$(base_image_fingerprint "$savevm")" + else + savevm_fp="no-savevm" + fi + printf '%s|%s\n' "$base_fp" "$savevm_fp" +} + prepare_runtime_config_iso() { local cfg_dir="$VM_DIR/runtime-config" local cfg_iso @@ -154,8 +184,9 @@ wait_for_condition() { } build_qemu_cmd() { - local base_img + local base_img savevm_file base_img="$(image_path)" + savevm_file="$(savevm_path)" if [ ! -f "$base_img" ]; then err "Missing QEMU image: $base_img" @@ -166,18 +197,35 @@ build_qemu_cmd() { mkdir -p "$VM_DIR" local fingerprint_file="$VM_DIR/base-image.fingerprint" local current_fp - current_fp="$(base_image_fingerprint "$base_img")" - if [ -f "$VM_DIR/disk.qcow2" ]; then - if [ -f "$fingerprint_file" ] && [ "$(cat "$fingerprint_file")" = "$current_fp" ]; then - log "Reusing existing overlay disk (changes persist)" - else - warn "QEMU base image has changed — recreating overlay." + current_fp="$(runtime_fingerprint "$base_img" "$savevm_file")" + + if snapshot_available; then + # The savevm RAM state was captured against the base image's exact disk + # state. An overlay with writes from a previous session diverges from + # that point, so -incoming would resume RAM against inconsistent disk. + # Always start from a fresh overlay in the snapshot path; per-session + # state is not preserved. Users who want persistence can opt out with + # EMULATOR_NO_SNAPSHOT=1. + if [ -f "$VM_DIR/disk.qcow2" ]; then rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file" fi - fi - if [ ! -f "$VM_DIR/disk.qcow2" ]; then qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null - base_image_fingerprint "$base_img" > "$fingerprint_file" + printf '%s' "$current_fp" > "$fingerprint_file" + else + # If the overlay was created against a different base or savevm, it will + # diverge from the snapshot's disk state — force a rebuild. + if [ -f "$VM_DIR/disk.qcow2" ]; then + if [ -f "$fingerprint_file" ] && [ "$(cat "$fingerprint_file")" = "$current_fp" ]; then + log "Reusing existing overlay disk (changes persist)" + else + warn "Base image or snapshot has changed — recreating overlay." + rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file" + fi + fi + if [ ! -f "$VM_DIR/disk.qcow2" ]; then + qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null + printf '%s' "$current_fp" > "$fingerprint_file" + fi fi local qemu_bin machine cpu firmware_args=() @@ -213,28 +261,100 @@ build_qemu_cmd() { # are mutually exclusive. netdev+=",hostfwd=tcp:127.0.0.1:${PORT_PREFIX}14-:${PORT_PREFIX}14" - QEMU_CMD=( - "$qemu_bin" - -machine "$machine" - -accel "$ACCEL" - -cpu "$cpu" - "${firmware_args[@]}" - -boot order=c - -m "$VM_RAM" - -smp "$VM_CPUS" - -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio" - -drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on" - -netdev "$netdev" - -device virtio-net-pci,netdev=net0 - -device virtio-balloon-pci - -virtfs "local,path=/,mount_tag=hostfs,security_model=none" - -chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off" - -mon "chardev=monitor,mode=control" - -serial "file:$VM_DIR/serial.log" - -display none - -daemonize - -pidfile "$VM_DIR/qemu.pid" - ) + # In snapshot-resume mode the QEMU command-line MUST match the device set + # used at snapshot capture time, otherwise migration replay fails (broken + # pipe / device tree mismatch). At capture time the build attaches: + # disk(if=virtio) + seed.iso + bundle.iso + runtime.iso (all if=virtio) + # netdev + virtio-net-pci + monitor + QGA virtio-serial + # SMP=4, RAM=4096 (pinned in build-image.sh snapshot mode) + # We mirror that exactly. The seed/bundle ISOs were used by cloud-init at + # build and are not needed at runtime, but their virtio-blk slots must + # exist so the migration replay matches device IDs. Runtime-only devices + # (virtfs, balloon) live at higher slots — extra at destination is fine. + local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" + if snapshot_available; then + log "Snapshot found at $savevm_file — fast-resume enabled." + snapshot_args+=(-incoming "exec:zstd -dc $savevm_file") + snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}" + if [ "$snapshot_smp" != "$VM_CPUS" ]; then + log "Pinning SMP to ${snapshot_smp} for snapshot resume (build-time value)." + fi + + # Tiny placeholder ISOs to match the seed.iso / bundle.iso slots present + # at snapshot time. Their content doesn't matter (cloud-init has already + # run); only the virtio-blk slot count must match. + local seed_phantom="$VM_DIR/seed.phantom" + local bundle_phantom="$VM_DIR/bundle.phantom" + if [ ! -s "$seed_phantom" ]; then + dd if=/dev/zero of="$seed_phantom" bs=1M count=1 status=none + fi + if [ ! -s "$bundle_phantom" ]; then + dd if=/dev/zero of="$bundle_phantom" bs=1M count=1 status=none + fi + runtime_only_args+=( + -drive "file=$seed_phantom,format=raw,if=virtio,readonly=on" + -drive "file=$bundle_phantom,format=raw,if=virtio,readonly=on" + ) + else + # Cold-boot: include virtio-balloon and virtfs as before. + runtime_only_args+=( + -device virtio-balloon-pci + -virtfs "local,path=/,mount_tag=hostfs,security_model=none" + ) + fi + + if snapshot_available; then + QEMU_CMD=( + "$qemu_bin" + -machine "$machine" + -accel "$ACCEL" + -cpu "$cpu" + "${firmware_args[@]}" + -boot order=c + -m "$VM_RAM" + -smp "$snapshot_smp" + -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio" + "${runtime_only_args[@]}" + -drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on" + -netdev "$netdev" + -device virtio-net-pci,netdev=net0 + -chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off" + -mon "chardev=monitor,mode=control" + -chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0" + -device virtio-serial + -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0" + "${snapshot_args[@]}" + -serial "file:$VM_DIR/serial.log" + -display none + -daemonize + -pidfile "$VM_DIR/qemu.pid" + ) + else + QEMU_CMD=( + "$qemu_bin" + -machine "$machine" + -accel "$ACCEL" + -cpu "$cpu" + "${firmware_args[@]}" + -boot order=c + -m "$VM_RAM" + -smp "$snapshot_smp" + -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio" + -drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on" + -netdev "$netdev" + -device virtio-net-pci,netdev=net0 + "${runtime_only_args[@]}" + -chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off" + -mon "chardev=monitor,mode=control" + -chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0" + -device virtio-serial + -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0" + -serial "file:$VM_DIR/serial.log" + -display none + -daemonize + -pidfile "$VM_DIR/qemu.pid" + ) + fi } @@ -274,6 +394,132 @@ start_vm() { "${QEMU_CMD[@]}" } +# Send one or more QMP commands over the monitor socket. Each line of stdin is +# a JSON object; capabilities are always negotiated first. Keep stdin open +# briefly after writing so socat doesn't close before QEMU responds — QMP +# typically replies in milliseconds so 0.3s is enough. +qmp_send() { + if [ ! -S "$VM_DIR/monitor.sock" ]; then + return 1 + fi + local payload + payload="$(cat)" + { + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' "$payload" + sleep 0.3 + } | socat -t5 - "UNIX-CONNECT:$VM_DIR/monitor.sock" 2>/dev/null +} + +# After -incoming, QEMU is in "inmigrate" until the entire migration stream has +# been received. Sending `cont` mid-migration would abort it (the host-side +# decompressor / pipe gets killed). Wait for the VM to reach a runnable state +# (paused / postmigrate / prelaunch / running) before continuing. +qmp_wait_for_paused_and_continue() { + local deadline=$((SECONDS + 120)) + while [ "$SECONDS" -lt "$deadline" ]; do + local out status + out=$(printf '%s\n' '{"execute":"query-status"}' | qmp_send || true) + status=$(printf '%s' "$out" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/') + case "$status" in + running) + return 0 + ;; + paused|postmigrate|prelaunch) + printf '%s\n' '{"execute":"cont"}' | qmp_send >/dev/null || true + return 0 + ;; + inmigrate|"") + # still loading migration data + ;; + *) + log "unexpected QMP status: $status" + ;; + esac + sleep 0.2 + done + return 1 +} + +# Generate fresh per-install secrets on the host. We pass them to the guest +# through QGA's guest-exec input-data field (base64-encoded), so no host file +# or virtfs mount is needed in the snapshot path. +generate_fresh_secrets_payload() { + printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$(openssl rand -hex 32)" + printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$(openssl rand -hex 32)" + printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$(openssl rand -hex 32)" + printf 'CRON_SECRET=%s\n' "$(openssl rand -hex 32)" +} + +# Drive qemu-guest-agent via its virtserialport socket. QGA speaks the same +# JSON protocol as QMP but over a separate channel. We use guest-sync to make +# sure the agent is responsive, then guest-exec to fire trigger-fast-rotate. +qga_send() { + if [ ! -S "$VM_DIR/qga.sock" ]; then + return 1 + fi + # socat closes the connection on stdin EOF before QGA can reply, so keep + # stdin open for a short window after writing the request to give the + # agent time to respond. QGA replies in milliseconds; the only reason this + # isn't 0.1s is to absorb scheduling jitter on a busy host. + local payload + payload="$(cat)" + ( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t10 - "UNIX-CONNECT:$VM_DIR/qga.sock" 2>/dev/null +} + +qga_wait_ready() { + local deadline=$((SECONDS + 30)) + while [ "$SECONDS" -lt "$deadline" ]; do + local resp + resp=$(printf '%s\n' '{"execute":"guest-sync","arguments":{"id":424242}}' | qga_send || true) + if printf '%s' "$resp" | grep -q '"return":[[:space:]]*424242'; then + return 0 + fi + sleep 0.2 + done + return 1 +} + +qga_trigger_fast_rotate() { + # guest-exec returns a pid; we then poll guest-exec-status until the + # process exits, and surface its exit code. Capture output so a failure + # message is available in serial.log. We pipe the fresh-secrets env file + # (as base64) to the script via input-data — keeps secrets off the + # filesystem and avoids needing virtfs. + local secrets_b64 resp pid + secrets_b64=$(generate_fresh_secrets_payload | base64 | tr -d '\n') + local cmd + cmd=$(printf '{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/trigger-fast-rotate","capture-output":true,"input-data":"%s"}}' "$secrets_b64") + resp=$(printf '%s\n' "$cmd" | qga_send || true) + pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/') + if [ -z "$pid" ]; then + err "guest-exec did not return a pid; response: $resp" + return 1 + fi + + # Rotation (sed + UPDATE + supervisorctl restart + node startup) fits well + # inside this window. + local deadline=$((SECONDS + 60)) + while [ "$SECONDS" -lt "$deadline" ]; do + local status_resp exited exitcode + status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true) + exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/') + if [ "$exited" = "true" ]; then + exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/') + if [ "${exitcode:-0}" = "0" ]; then + log "rotate-secrets completed." + return 0 + fi + err "rotate-secrets exited with code ${exitcode:-unknown}" + err "response: $status_resp" + return 1 + fi + sleep 0.2 + done + err "rotate-secrets did not complete within 60s" + return 1 +} + stop_vm() { if [ ! -f "$VM_DIR/qemu.pid" ]; then return 0 @@ -305,18 +551,58 @@ cmd_start() { info "Arch: $ARCH | Accel: $ACCEL" info "Ports: Dashboard=$EMULATOR_DASHBOARD_PORT Backend=$EMULATOR_BACKEND_PORT MinIO=$EMULATOR_MINIO_PORT Inbucket=$EMULATOR_INBUCKET_PORT" + local using_snapshot=0 + if snapshot_available; then + using_snapshot=1 + fi + start_vm info "VM: ${VM_RAM}MB / ${VM_CPUS} CPUs" - if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then - tail_vm_logs - exit 1 - fi + if [ "$using_snapshot" = "1" ]; then + log "Resuming from snapshot..." + if ! qmp_wait_for_paused_and_continue; then + warn "Snapshot resume did not reach a runnable state — falling back to cold boot." + snapshot_fallback_to_cold_boot + return + fi - if ! wait_for_condition "dashboard/backend" "$READY_TIMEOUT" app_ready; then - tail_vm_logs - exit 1 + log "VM resumed; waiting for guest agent..." + if ! qga_wait_ready; then + warn "Guest agent did not respond — falling back to cold boot." + snapshot_fallback_to_cold_boot + return + fi + + log "Generating fresh secrets + triggering rotation..." + if ! qga_trigger_fast_rotate; then + warn "Failed to trigger rotate-secrets — falling back to cold boot." + snapshot_fallback_to_cold_boot + return + fi + + # Wait for the *new* backend (post-supervisor-restart) to actually be + # listening. all_ready may briefly return true against the OLD Node + # processes between when supervisor sends SIGTERM and when the children + # die; sleep a beat so we measure the real readiness. + sleep 1 + if ! wait_for_condition "rotated services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then + warn "Services did not recover after rotation — falling back to cold boot." + tail_vm_logs + snapshot_fallback_to_cold_boot + return + fi + else + if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then + tail_vm_logs + exit 1 + fi + + if ! wait_for_condition "dashboard/backend" "$READY_TIMEOUT" app_ready; then + tail_vm_logs + exit 1 + fi fi log "All services are green." @@ -324,6 +610,17 @@ cmd_start() { info "Backend: http://localhost:${EMULATOR_BACKEND_PORT}" } +# If anything about the snapshot resume fails, stop the VM, wipe the overlay, +# and retry as a cold boot. Keeps the user unblocked even when the snapshot is +# broken (e.g. stale, incompatible host-arch/QEMU-version mismatch). +snapshot_fallback_to_cold_boot() { + warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..." + stop_vm + rm -rf "$VM_DIR" + EMULATOR_NO_SNAPSHOT=1 + cmd_start +} + cmd_stop() { stop_vm log "QEMU emulator stopped." diff --git a/docker/local-emulator/rotate-secrets.sh b/docker/local-emulator/rotate-secrets.sh new file mode 100644 index 0000000000..d374446426 --- /dev/null +++ b/docker/local-emulator/rotate-secrets.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# Rotate baked-in placeholder secrets with fresh host-generated values. +# +# Called inside the stack container by the emulator snapshot-resume path. +# Host writes fresh secrets to /host/stack-runtime/fresh-secrets.env before +# invoking this script (via `docker exec stack /usr/local/bin/rotate-secrets`). +# +# Flow: +# 1. Read fresh secrets from host-supplied env file. +# 2. Validate they are 64-char hex (the build placeholders are too). +# 3. Write rotated-secrets.env that app-entrypoint and run-cron-jobs source +# on restart. +# 4. Targeted sed across built files: swap the placeholder PCK for the fresh +# one (this is the only secret baked into JS via sentinel replacement at +# build time — SSK/SAK/CRON_SECRET flow through process.env only). +# 5. UPDATE the internal ApiKeySet row in Postgres. +# 6. supervisorctl restart stack-app + cron-jobs so the new values take +# effect in the running Node processes. + +set -euo pipefail + +OUTPUT=/run/stack-auth/rotated-secrets.env +WORK_DIR="${STACK_RUNTIME_WORK_DIR:-/app}" + +PLACEHOLDER_PCK="00000000000000000000000000000000ffffffffffffffffffffffffffffffff" + +log() { printf '[rotate-secrets] %s\n' "$*"; } + +# Fresh secrets arrive via env vars (passed by trigger-fast-rotate using +# `docker exec -e`). For backward compatibility, fall back to a file path if +# STACK_ROTATE_INPUT is set. +if [ -n "${STACK_ROTATE_INPUT:-}" ] && [ -f "$STACK_ROTATE_INPUT" ]; then + log "reading fresh secrets from $STACK_ROTATE_INPUT" + set -a + # shellcheck disable=SC1090 + source "$STACK_ROTATE_INPUT" + set +a +fi + +for var in STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY \ + STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY \ + STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY \ + CRON_SECRET; do + val="${!var:-}" + if [ -z "$val" ]; then + log "ERROR: $var is missing from environment" + exit 1 + fi + if ! printf '%s' "$val" | grep -Eq '^[0-9a-fA-F]{64}$'; then + log "ERROR: $var is not a 64-char hex string" + exit 1 + fi +done + +mkdir -p "$(dirname "$OUTPUT")" +umask 077 +{ + printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY" + printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY" + printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY" + printf 'CRON_SECRET=%s\n' "$CRON_SECRET" + # Mirror these so process.env lookups in Node match env after restart. + printf 'NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY" + printf 'STACK_SECRET_SERVER_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY" + printf 'STACK_SUPER_SECRET_ADMIN_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY" +} > "$OUTPUT" +chmod 0600 "$OUTPUT" +log "wrote $OUTPUT" + +# The PCK is baked into built JS via STACK_ENV_VAR_SENTINEL replacement at +# container start (see /app-entrypoint.sh). Swap the placeholder hex for the +# fresh value across the built tree. Only *.js files need patching; this +# runs in ~1s on the standalone Next.js bundles. +if [ "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY" != "$PLACEHOLDER_PCK" ]; then + log "rewriting PCK placeholder in $WORK_DIR" + # grep -rl narrows the find to only files that contain the placeholder, so + # the follow-up sed doesn't walk the whole tree. + mapfile -t files < <(grep -rl --include='*.js' "$PLACEHOLDER_PCK" "$WORK_DIR/apps" 2>/dev/null || true) + if [ "${#files[@]}" -gt 0 ]; then + sed -i "s|${PLACEHOLDER_PCK}|${STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY}|g" "${files[@]}" + log "patched ${#files[@]} file(s)" + else + log "no files contained the placeholder (already rotated?)" + fi +fi + +# Update the internal ApiKeySet row so existing dashboard sessions keep +# working with the new keys. Values are already validated as hex above, so +# inlining is safe. +if [ -n "${STACK_DATABASE_CONNECTION_STRING:-}" ]; then + log "updating internal ApiKeySet" + psql "$STACK_DATABASE_CONNECTION_STRING" -v ON_ERROR_STOP=1 < a.name === diskAsset); + if (!diskMatch) { + throw new CliError(`Asset ${diskAsset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`); + } + const snapshotMatch = assets.assets.find((a) => a.name === snapshotAsset); + const token = gh(["auth", "token"]); + + await downloadAsset(diskMatch, imageDir, diskAsset, token, tag); + if (snapshotMatch) { + await downloadAsset(snapshotMatch, imageDir, snapshotAsset, token, tag); + } else { + console.log(`Snapshot asset ${snapshotAsset} not available in release ${tag}; fast-start disabled for this image.`); + } +} + +async function downloadAsset( + match: { name: string, apiUrl: string, size: number }, + imageDir: string, + asset: string, + token: string, + tag: string, +): Promise { const dest = join(imageDir, asset); const tmpDest = `${dest}.download`; - console.log(`Pulling ${asset} from release ${tag}...`); try { - const assets = JSON.parse(gh(["release", "view", tag, "--repo", repo, "--json", "assets"])) as { - assets: { name: string, apiUrl: string, size: number }[], - }; - const match = assets.assets.find((a) => a.name === asset); - if (!match) { - throw new CliError(`Asset ${asset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`); - } - const token = gh(["auth", "token"]); await downloadWithProgress(match.apiUrl, { Authorization: `Bearer ${token}`, Accept: "application/octet-stream", @@ -185,7 +207,7 @@ async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branc } catch (err) { if (existsSync(tmpDest)) unlinkSync(tmpDest); if (err instanceof CliError) throw err; - throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}\nRun 'stack emulator list-releases' to see available releases.`); + throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}`); } renameSync(tmpDest, dest); console.log(`Downloaded: ${dest}`); @@ -291,7 +313,9 @@ export function registerEmulatorCommand(program: Command) { const imageDir = emulatorImageDir(); mkdirSync(imageDir, { recursive: true }); const dest = join(imageDir, `stack-emulator-${arch}.qcow2`); + const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`); if (existsSync(dest)) unlinkSync(dest); + if (existsSync(snapshotDest)) unlinkSync(snapshotDest); console.log(`Downloading qemu-emulator-${arch} from workflow run ${runId}...`); try { execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}`, "--dir", imageDir], { stdio: "inherit" }); @@ -300,6 +324,15 @@ export function registerEmulatorCommand(program: Command) { } if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`); console.log(`Downloaded: ${dest}`); + // Snapshot artifact is optional — older CI builds may not produce it. + try { + execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}-savevm`, "--dir", imageDir], { stdio: "pipe" }); + if (existsSync(snapshotDest)) { + console.log(`Downloaded: ${snapshotDest}`); + } + } catch { + console.log(`Snapshot artifact not available for run ${runId}; fast-start disabled.`); + } } else { await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag }); } From 30dbdffc4a77b0e2ee2b327517a28b53b5824eb1 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 13:04:15 -0700 Subject: [PATCH 03/25] faster snapshot resume via mapped-ram + rotation opt-out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Snapshot resume drops from ~14s to ~5-7s with rotation, ~2.5s without. Build uses QEMU's mapped-ram + multifd migration capability so the RAM state is written at page-aligned offsets in a sparse file. Runtime decompresses the shipped .savevm.zst once to a local .raw cache and reloads via -incoming file: + migrate-incoming on subsequent starts, avoiding the per-start zstd decode. Adds EMULATOR_NO_ROTATION=1 for tests/CI that don't mind the placeholder secrets; saves the full ~3s rotation window. Misc runtime cleanups: tighter QMP/QGA poll intervals (1s → 0.2s), shorter socat keep-alive windows, 1s settle before the post-rotation health-check to avoid racing old Node processes, fallback path preserves the CLI-generated runtime-config.iso instead of blowing away VM_DIR. Build-time qmp_session keeps stdin open briefly after the caller's commands so migrate-set-capabilities is actually processed before socat closes — without this, mapped-ram was silently a no-op. CI workflow publishes .savevm.zst alongside the .qcow2 (optional asset; CLI falls back to cold boot when missing). Test + verify steps go through the CLI now that ISO generation is owned by packages/stack-cli. --- .github/workflows/qemu-emulator-build.yaml | 127 ++++-- docker/local-emulator/qemu/build-image.sh | 35 +- docker/local-emulator/qemu/run-emulator.sh | 165 +++++-- packages/stack-cli/package.json | 4 +- .../stack-cli/src/commands/emulator.test.ts | 166 ++++++++ packages/stack-cli/src/commands/emulator.ts | 337 ++++++++++++--- packages/stack-cli/src/lib/iso.test.ts | 259 +++++++++++ packages/stack-cli/src/lib/iso.ts | 403 ++++++++++++++++++ packages/stack-cli/vitest.config.ts | 19 + pnpm-lock.yaml | 241 +++-------- 10 files changed, 1444 insertions(+), 312 deletions(-) create mode 100644 packages/stack-cli/src/commands/emulator.test.ts create mode 100644 packages/stack-cli/src/lib/iso.test.ts create mode 100644 packages/stack-cli/src/lib/iso.ts create mode 100644 packages/stack-cli/vitest.config.ts diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml index 5df1497465..4bb738124d 100644 --- a/.github/workflows/qemu-emulator-build.yaml +++ b/.github/workflows/qemu-emulator-build.yaml @@ -55,10 +55,21 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + - uses: pnpm/action-setup@v4 + if: matrix.arch == 'amd64' + with: + version: 10.23.0 + + - uses: actions/setup-node@v4 + if: matrix.arch == 'amd64' + with: + node-version: 22 + cache: pnpm + - name: Install QEMU dependencies run: | sudo apt-get update - sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64 + sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64 zstd - name: Enable KVM access run: | @@ -82,41 +93,61 @@ jobs: - name: Generate emulator env run: node docker/local-emulator/generate-env-development.mjs - # arm64 runs under cross-arch TCG on an amd64 runner; the backend's - # V8 TurboFan JIT re-triggers the SIGTRAPs we dodge in migrations - # with --no-opt, and even if it didn't, boot is too slow under TCG - # to verify in any sane window. amd64 KVM already exercises the - # service stack; real arm64 hosts have KVM for end-users. - - name: Start emulator and verify + # amd64 runs under KVM on the runner so we can boot the newly-built + # image to verify it works end-to-end before publishing. arm64 runs + # under cross-arch TCG on an amd64 host, which can't reliably boot + # Next.js within any sane window — skipped. + - name: Build stack-cli (for emulator CLI) if: matrix.arch == 'amd64' run: | - chmod +x docker/local-emulator/qemu/run-emulator.sh - EMULATOR_ARCH=${{ matrix.arch }} \ - EMULATOR_READY_TIMEOUT=3200 \ - docker/local-emulator/qemu/run-emulator.sh start + pnpm install --frozen-lockfile --filter @stackframe/stack-cli... + pnpm --filter @stackframe/stack-cli run build + + - name: Start emulator and verify + if: matrix.arch == 'amd64' + env: + EMULATOR_ARCH: ${{ matrix.arch }} + EMULATOR_READY_TIMEOUT: 3200 + EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }} + EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }} + run: node packages/stack-cli/dist/index.js emulator start - name: Verify services are healthy if: matrix.arch == 'amd64' - run: | - EMULATOR_ARCH=${{ matrix.arch }} \ - docker/local-emulator/qemu/run-emulator.sh status + env: + EMULATOR_ARCH: ${{ matrix.arch }} + EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }} + EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }} + run: node packages/stack-cli/dist/index.js emulator status - name: Stop emulator if: always() && matrix.arch == 'amd64' - run: | - EMULATOR_ARCH=${{ matrix.arch }} \ - docker/local-emulator/qemu/run-emulator.sh stop + env: + EMULATOR_ARCH: ${{ matrix.arch }} + EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }} + EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }} + run: node packages/stack-cli/dist/index.js emulator stop - name: Package image run: | BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.qcow2" + SAVEVM="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.savevm.zst" cp "$BASE_IMG" "stack-emulator-${{ matrix.arch }}.qcow2" + if [ -f "$SAVEVM" ]; then + cp "$SAVEVM" "stack-emulator-${{ matrix.arch }}.savevm.zst" + ls -lh "stack-emulator-${{ matrix.arch }}.savevm.zst" + else + echo "NOTE: no savevm snapshot was produced; fast-start will be unavailable for this arch." + fi - name: Upload image artifact uses: actions/upload-artifact@v4 with: name: qemu-emulator-${{ matrix.arch }} - path: stack-emulator-${{ matrix.arch }}.qcow2 + path: | + stack-emulator-${{ matrix.arch }}.qcow2 + stack-emulator-${{ matrix.arch }}.savevm.zst + if-no-files-found: warn retention-days: 30 compression-level: 0 @@ -137,28 +168,48 @@ jobs: - name: Install QEMU dependencies run: | sudo apt-get update - sudo apt-get install -y qemu-system-x86 qemu-utils genisoimage socat + sudo apt-get install -y qemu-system-x86 qemu-utils socat zstd + + - uses: pnpm/action-setup@v4 + with: + version: 10.23.0 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Install stack-cli deps + build + run: | + pnpm install --frozen-lockfile --filter @stackframe/stack-cli... + pnpm --filter @stackframe/stack-cli run build - name: Download built image uses: actions/download-artifact@v4 with: name: qemu-emulator-${{ matrix.arch }} - path: docker/local-emulator/qemu/images/ + path: ${{ github.workspace }}/.stack-emulator-images/ - - name: Generate emulator env - run: node docker/local-emulator/generate-env-development.mjs + - name: Place images into STACK_EMULATOR_HOME layout + run: | + mkdir -p "$HOME/.stack/emulator/images" + cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$HOME/.stack/emulator/images/" + if [ -f "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.savevm.zst" ]; then + cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.savevm.zst" "$HOME/.stack/emulator/images/" + echo "Snapshot present — will test snapshot-resume path." + else + echo "No snapshot — will test cold-boot path." + fi + ls -lh "$HOME/.stack/emulator/images/" - - name: Start emulator from artifact + - name: Start emulator via CLI run: | - chmod +x docker/local-emulator/qemu/run-emulator.sh docker/local-emulator/qemu/common.sh EMULATOR_ARCH=${{ matrix.arch }} \ EMULATOR_READY_TIMEOUT=600 \ - docker/local-emulator/qemu/run-emulator.sh start + node packages/stack-cli/dist/index.js emulator start - name: Verify services are healthy - run: | - EMULATOR_ARCH=${{ matrix.arch }} \ - docker/local-emulator/qemu/run-emulator.sh status + run: node packages/stack-cli/dist/index.js emulator status - name: Smoke test — backend health run: curl -sf http://localhost:26701/health?db=1 @@ -174,13 +225,11 @@ jobs: - name: Stop emulator if: always() - run: | - EMULATOR_ARCH=${{ matrix.arch }} \ - docker/local-emulator/qemu/run-emulator.sh stop + run: node packages/stack-cli/dist/index.js emulator stop - name: Print serial log on failure if: failure() - run: tail -100 docker/local-emulator/qemu/run/vm/serial.log 2>/dev/null || true + run: tail -100 $HOME/.stack/emulator/run/vm/serial.log 2>/dev/null || true publish: name: Publish to GitHub Releases @@ -211,6 +260,11 @@ jobs: for f in artifacts/qemu-emulator-*/*.qcow2; do cp "$f" release/ done + # savevm.zst is optional — older branches may not produce it. Skip + # missing files rather than failing the publish. + for f in artifacts/qemu-emulator-*/*.savevm.zst; do + [ -f "$f" ] && cp "$f" release/ + done cat > release-notes.md </dev/null || echo 0)" + raw_ts="$(stat -f '%m' "$raw" 2>/dev/null || echo 0)" + ;; + *) + zst_ts="$(stat -c '%Y' "$zst" 2>/dev/null || echo 0)" + raw_ts="$(stat -c '%Y' "$raw" 2>/dev/null || echo 0)" + ;; + esac + + if [ -s "$raw" ] && [ "$raw_ts" -ge "$zst_ts" ]; then + return 0 + fi + + log "Decompressing snapshot cache (one-time; ~2-3GB sparse)..." + local tmp="${raw}.tmp" + rm -f "$tmp" + if ! zstd -dc "$zst" > "$tmp"; then + err "Failed to decompress $zst" + rm -f "$tmp" + return 1 + fi + mv "$tmp" "$raw" +} + # Returns a fast fingerprint (size:mtime) of the base QEMU image. # Used to detect whether the image has changed since the overlay was created. base_image_fingerprint() { @@ -107,22 +152,14 @@ runtime_fingerprint() { printf '%s|%s\n' "$base_fp" "$savevm_fp" } -prepare_runtime_config_iso() { - local cfg_dir="$VM_DIR/runtime-config" +ensure_runtime_config_iso() { local cfg_iso cfg_iso="$(runtime_iso_path)" - rm -rf "$cfg_dir" - mkdir -p "$cfg_dir" - { - printf "STACK_EMULATOR_PORT_PREFIX=%s\n" "$PORT_PREFIX" - printf "STACK_EMULATOR_DASHBOARD_HOST_PORT=%s\n" "$EMULATOR_DASHBOARD_PORT" - printf "STACK_EMULATOR_BACKEND_HOST_PORT=%s\n" "$EMULATOR_BACKEND_PORT" - printf "STACK_EMULATOR_MINIO_HOST_PORT=%s\n" "$EMULATOR_MINIO_PORT" - printf "STACK_EMULATOR_INBUCKET_HOST_PORT=%s\n" "$EMULATOR_INBUCKET_PORT" - printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$VM_DIR" - } > "$cfg_dir/runtime.env" - cp "$SCRIPT_DIR/../.env.development" "$cfg_dir/base.env" - make_iso_from_dir "$cfg_iso" "STACKCFG" "$cfg_dir" + if [ ! -s "$cfg_iso" ]; then + err "Runtime config ISO missing at $cfg_iso." + err "The CLI normally generates this; if you're invoking run-emulator.sh directly, run via 'stack emulator start' instead." + exit 1 + fi } service_is_up() { @@ -274,7 +311,10 @@ build_qemu_cmd() { local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" if snapshot_available; then log "Snapshot found at $savevm_file — fast-resume enabled." - snapshot_args+=(-incoming "exec:zstd -dc $savevm_file") + # -incoming defer: QEMU starts, waits for a QMP migrate-incoming command. + # We use that to set mapped-ram + multifd capabilities before loading, + # which enables parallel RAM restore (~2-3x faster than streamed decode). + snapshot_args+=(-incoming defer) snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}" if [ "$snapshot_smp" != "$VM_CPUS" ]; then log "Pinning SMP to ${snapshot_smp} for snapshot resume (build-time value)." @@ -389,7 +429,7 @@ ensure_ports_free() { start_vm() { mkdir -p "$VM_DIR" : > "$VM_DIR/serial.log" - prepare_runtime_config_iso + ensure_runtime_config_iso build_qemu_cmd "${QEMU_CMD[@]}" } @@ -411,12 +451,34 @@ qmp_send() { } | socat -t5 - "UNIX-CONNECT:$VM_DIR/monitor.sock" 2>/dev/null } -# After -incoming, QEMU is in "inmigrate" until the entire migration stream has -# been received. Sending `cont` mid-migration would abort it (the host-side -# decompressor / pipe gets killed). Wait for the VM to reach a runnable state -# (paused / postmigrate / prelaunch / running) before continuing. -qmp_wait_for_paused_and_continue() { - local deadline=$((SECONDS + 120)) +# After -incoming defer, QEMU waits for a migrate-incoming command. This sets +# up mapped-ram + multifd capabilities and kicks off the RAM load from the +# decompressed cache file. Returns once the VM is running. +qmp_incoming_and_cont() { + local raw_file="$1" + + # Set caps + parameters before migrate-incoming, same as source. + local setup_resp + setup_resp=$( { + printf '%s\n' '{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}' + printf '%s\n' '{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}' + } | qmp_send) + if printf '%s' "$setup_resp" | grep -q '"error"'; then + err "QMP caps setup failed: $setup_resp" + return 1 + fi + + # Kick off the incoming migration from the mapped-ram file. + local inc_cmd inc_resp + inc_cmd=$(printf '{"execute":"migrate-incoming","arguments":{"uri":"file:%s"}}' "$raw_file") + inc_resp=$(printf '%s\n' "$inc_cmd" | qmp_send) + if printf '%s' "$inc_resp" | grep -q '"error"'; then + err "QMP migrate-incoming failed: $inc_resp" + return 1 + fi + + # Poll until status reaches a runnable state, then cont. + local deadline=$((SECONDS + 60)) while [ "$SECONDS" -lt "$deadline" ]; do local out status out=$(printf '%s\n' '{"execute":"query-status"}' | qmp_send || true) @@ -430,7 +492,6 @@ qmp_wait_for_paused_and_continue() { return 0 ;; inmigrate|"") - # still loading migration data ;; *) log "unexpected QMP status: $status" @@ -539,7 +600,6 @@ stop_vm() { fi fi rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/serial.log" - rm -rf "$VM_DIR/runtime-config" rm -f "$VM_DIR/runtime-config.iso" } @@ -553,6 +613,11 @@ cmd_start() { local using_snapshot=0 if snapshot_available; then + if ! ensure_savevm_raw; then + warn "Snapshot decompression failed — falling back to cold boot." + snapshot_fallback_to_cold_boot + return + fi using_snapshot=1 fi @@ -561,8 +626,8 @@ cmd_start() { info "VM: ${VM_RAM}MB / ${VM_CPUS} CPUs" if [ "$using_snapshot" = "1" ]; then - log "Resuming from snapshot..." - if ! qmp_wait_for_paused_and_continue; then + log "Resuming from snapshot (mapped-ram + multifd)..." + if ! qmp_incoming_and_cont "$(savevm_raw_path)"; then warn "Snapshot resume did not reach a runnable state — falling back to cold boot." snapshot_fallback_to_cold_boot return @@ -575,23 +640,33 @@ cmd_start() { return fi - log "Generating fresh secrets + triggering rotation..." - if ! qga_trigger_fast_rotate; then - warn "Failed to trigger rotate-secrets — falling back to cold boot." - snapshot_fallback_to_cold_boot - return - fi + if [ "$EMULATOR_NO_ROTATION" = "1" ]; then + warn "EMULATOR_NO_ROTATION=1: snapshot's placeholder secrets are in effect — do not expose this instance." + if ! wait_for_condition "services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then + warn "Services did not respond after resume — falling back to cold boot." + tail_vm_logs + snapshot_fallback_to_cold_boot + return + fi + else + log "Generating fresh secrets + triggering rotation..." + if ! qga_trigger_fast_rotate; then + warn "Failed to trigger rotate-secrets — falling back to cold boot." + snapshot_fallback_to_cold_boot + return + fi - # Wait for the *new* backend (post-supervisor-restart) to actually be - # listening. all_ready may briefly return true against the OLD Node - # processes between when supervisor sends SIGTERM and when the children - # die; sleep a beat so we measure the real readiness. - sleep 1 - if ! wait_for_condition "rotated services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then - warn "Services did not recover after rotation — falling back to cold boot." - tail_vm_logs - snapshot_fallback_to_cold_boot - return + # Wait for the *new* backend (post-supervisor-restart) to actually be + # listening. all_ready may briefly return true against the OLD Node + # processes between when supervisor sends SIGTERM and when the children + # die; sleep a beat so we measure the real readiness. + sleep 1 + if ! wait_for_condition "rotated services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then + warn "Services did not recover after rotation — falling back to cold boot." + tail_vm_logs + snapshot_fallback_to_cold_boot + return + fi fi else if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then @@ -616,7 +691,11 @@ cmd_start() { snapshot_fallback_to_cold_boot() { warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..." stop_vm - rm -rf "$VM_DIR" + # Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one, + # but keep the CLI-generated runtime-config.iso (we can't regenerate it + # from shell — the CLI owns that). + rm -f "$VM_DIR/disk.qcow2" "$VM_DIR/base-image.fingerprint" \ + "$VM_DIR/seed.phantom" "$VM_DIR/bundle.phantom" EMULATOR_NO_SNAPSHOT=1 cmd_start } diff --git a/packages/stack-cli/package.json b/packages/stack-cli/package.json index 3f574e2413..57a8bfaae7 100644 --- a/packages/stack-cli/package.json +++ b/packages/stack-cli/package.json @@ -13,7 +13,8 @@ "build": "tsdown && node scripts/copy-emulator-assets.mjs", "dev": "tsdown --watch", "lint": "eslint --ext .tsx,.ts .", - "typecheck": "tsc --noEmit" + "typecheck": "tsc --noEmit", + "test": "vitest run" }, "files": [ "README.md", @@ -31,6 +32,7 @@ "@stackframe/js": "workspace:*", "@stackframe/stack-shared": "workspace:*", "commander": "^13.1.0", + "extract-zip": "^2.0.1", "jiti": "^2.4.2" }, "devDependencies": { diff --git a/packages/stack-cli/src/commands/emulator.test.ts b/packages/stack-cli/src/commands/emulator.test.ts new file mode 100644 index 0000000000..9cbe9caa16 --- /dev/null +++ b/packages/stack-cli/src/commands/emulator.test.ts @@ -0,0 +1,166 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + envPort, + formatBytes, + formatDuration, + platformInstallHint, + renderProgressLine, + resolveArch, +} from "./emulator.js"; + +describe("formatBytes", () => { + it("renders B / KB / MB / GB across unit boundaries", () => { + expect(formatBytes(0)).toBe("0 B"); + expect(formatBytes(1)).toBe("1 B"); + expect(formatBytes(1023)).toBe("1023 B"); + expect(formatBytes(1024)).toBe("1.0 KB"); + expect(formatBytes(1536)).toBe("1.5 KB"); + expect(formatBytes(1024 * 1024)).toBe("1.0 MB"); + expect(formatBytes(1024 * 1024 * 1024)).toBe("1.0 GB"); + expect(formatBytes(1024 * 1024 * 1024 * 1024)).toBe("1.0 TB"); + }); + + it("switches precision at v>=10 within a unit", () => { + expect(formatBytes(1024 * 10)).toBe("10 KB"); + expect(formatBytes(1024 * 9.5)).toBe("9.5 KB"); + }); + + it("returns '?' for non-finite and negative values", () => { + expect(formatBytes(NaN)).toBe("?"); + expect(formatBytes(Infinity)).toBe("?"); + expect(formatBytes(-1)).toBe("?"); + }); + + it("caps at TB for very large values", () => { + // Even if we exceed TB, we don't walk off the end of the units array. + const huge = 1024 ** 6; // exabyte-scale + expect(formatBytes(huge)).toMatch(/ TB$/); + }); +}); + +describe("formatDuration", () => { + it("uses s/m/h units at the right boundaries", () => { + expect(formatDuration(0)).toBe("0s"); + expect(formatDuration(59)).toBe("59s"); + expect(formatDuration(60)).toBe("1m00s"); + expect(formatDuration(61)).toBe("1m01s"); + expect(formatDuration(3599)).toBe("59m59s"); + expect(formatDuration(3600)).toBe("1h00m"); + expect(formatDuration(3660)).toBe("1h01m"); + }); + + it("rounds seconds to integers", () => { + expect(formatDuration(59.4)).toBe("59s"); + expect(formatDuration(59.9)).toBe("1m00s"); + }); + + it("returns '?' for non-finite and negative values", () => { + expect(formatDuration(NaN)).toBe("?"); + expect(formatDuration(Infinity)).toBe("?"); + expect(formatDuration(-1)).toBe("?"); + }); +}); + +describe("renderProgressLine", () => { + it("renders a known-size progress bar with percent, size, speed, and ETA", () => { + const line = renderProgressLine(1024, 2048, 512); + expect(line).toContain("50.0%"); + expect(line).toContain("/"); + expect(line).toContain("/s"); + expect(line).toContain("eta"); + }); + + it("hides the percent / ETA fields when total size is unknown (total=0)", () => { + const line = renderProgressLine(1024, 0, 512); + expect(line).not.toContain("%"); + expect(line).not.toContain("eta"); + expect(line).toContain("/s"); + }); + + it("clamps percent at 100 if downloaded overshoots total (rounding)", () => { + const line = renderProgressLine(2050, 2048, 100); + expect(line).toContain("100.0%"); + }); + + it("handles bytesPerSec = 0 by suppressing ETA", () => { + const line = renderProgressLine(512, 2048, 0); + expect(line).not.toContain("eta"); + }); +}); + +describe("envPort", () => { + const SAVED = process.env.__TEST_PORT; + beforeEach(() => { + delete process.env.__TEST_PORT; + }); + afterEach(() => { + if (SAVED === undefined) delete process.env.__TEST_PORT; + else process.env.__TEST_PORT = SAVED; + }); + + it("returns the fallback when the env var is not set", () => { + expect(envPort("__TEST_PORT", 1234)).toBe(1234); + }); + + it("parses a valid integer value", () => { + process.env.__TEST_PORT = "9876"; + expect(envPort("__TEST_PORT", 1234)).toBe(9876); + }); + + it("rejects zero and negative values", () => { + process.env.__TEST_PORT = "0"; + expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/); + process.env.__TEST_PORT = "-5"; + expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/); + }); + + it("rejects non-integer and non-numeric values", () => { + process.env.__TEST_PORT = "3.14"; + expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/); + process.env.__TEST_PORT = "not-a-port"; + expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/); + }); + + it("treats empty string as not set (returns fallback)", () => { + // Regression target: earlier versions sometimes parsed "" as 0 and threw. + process.env.__TEST_PORT = ""; + expect(envPort("__TEST_PORT", 1234)).toBe(1234); + }); +}); + +describe("resolveArch", () => { + it("accepts explicit arm64 / amd64", () => { + expect(resolveArch("arm64")).toBe("arm64"); + expect(resolveArch("amd64")).toBe("amd64"); + }); + + it("throws on unsupported explicit arch", () => { + expect(() => resolveArch("mips")).toThrow(/Invalid architecture/); + expect(() => resolveArch("x86")).toThrow(/Invalid architecture/); + }); + + it("maps the current process arch when raw is undefined", () => { + const expected = process.arch === "arm64" ? "arm64" : process.arch === "x64" ? "amd64" : null; + if (expected === null) { + expect(() => resolveArch()).toThrow(/Invalid architecture/); + } else { + expect(resolveArch()).toBe(expected); + } + }); +}); + +describe("platformInstallHint", () => { + it("uses brew on darwin and apt on linux", () => { + const spy = vi.spyOn(process, "platform", "get"); + try { + spy.mockReturnValue("darwin"); + expect(platformInstallHint("foo-linux", "foo-mac")).toContain("brew install foo-mac"); + spy.mockReturnValue("linux"); + expect(platformInstallHint("foo-linux", "foo-mac")).toContain("apt install foo-linux"); + spy.mockReturnValue("win32"); + expect(platformInstallHint("foo-linux", "foo-mac")).toContain("install foo-mac"); + } finally { + spy.mockRestore(); + } + }); +}); diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts index 7753cc86a5..5967a11038 100644 --- a/packages/stack-cli/src/commands/emulator.ts +++ b/packages/stack-cli/src/commands/emulator.ts @@ -1,5 +1,6 @@ import { Command } from "commander"; import { execFileSync, spawn } from "child_process"; +import extract from "extract-zip"; import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs"; import { homedir } from "os"; import { dirname, join, resolve } from "path"; @@ -7,19 +8,36 @@ import { Readable } from "stream"; import { pipeline } from "stream/promises"; import { fileURLToPath } from "url"; import { CliError } from "../lib/errors.js"; +import { writeIso } from "../lib/iso.js"; const DEFAULT_EMULATOR_BACKEND_PORT = 26701; - -function emulatorBackendPort(): number { - const raw = process.env.EMULATOR_BACKEND_PORT; - if (!raw) return DEFAULT_EMULATOR_BACKEND_PORT; +const DEFAULT_EMULATOR_DASHBOARD_PORT = 26700; +const DEFAULT_EMULATOR_MINIO_PORT = 26702; +const DEFAULT_EMULATOR_INBUCKET_PORT = 26703; +const DEFAULT_PORT_PREFIX = "81"; +const GITHUB_API = "https://api.github.com"; +const DEFAULT_REPO = "stack-auth/stack-auth"; +const AARCH64_FIRMWARE_PATHS = [ + "/opt/homebrew/share/qemu/edk2-aarch64-code.fd", + "/usr/share/qemu/edk2-aarch64-code.fd", + "/usr/share/AAVMF/AAVMF_CODE.fd", + "/usr/share/qemu-efi-aarch64/QEMU_EFI.fd", +]; + +export function envPort(name: string, fallback: number): number { + const raw = process.env[name]; + if (!raw) return fallback; const parsed = Number(raw); if (!Number.isInteger(parsed) || parsed <= 0) { - throw new CliError(`Invalid EMULATOR_BACKEND_PORT: ${raw}`); + throw new CliError(`Invalid ${name}: ${raw}`); } return parsed; } +function emulatorBackendPort(): number { + return envPort("EMULATOR_BACKEND_PORT", DEFAULT_EMULATOR_BACKEND_PORT); +} + function emulatorHome(): string { return process.env.STACK_EMULATOR_HOME ?? join(homedir(), ".stack", "emulator"); } @@ -84,15 +102,40 @@ async function fetchEmulatorCredentials(pck: string, backendPort: number, config }; } -function gh(args: string[]): string { +// Resolve a GitHub auth token. We try GITHUB_TOKEN first so users can pin a +// PAT, then fall back to `gh auth token` if the gh CLI is installed and +// signed in. If neither works we return undefined — public release downloads +// still work (anonymous, lower rate limit) but artifact downloads fail with a +// clear error at the call site. +function githubToken(): string | undefined { + if (process.env.GITHUB_TOKEN) return process.env.GITHUB_TOKEN; try { - return execFileSync("gh", args, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim(); - } catch (err: unknown) { - if (err instanceof Error && "stderr" in err && typeof err.stderr === "string") { - throw new CliError(`GitHub CLI error: ${err.stderr}`); - } - throw new CliError("GitHub CLI (gh) is required. Install: https://cli.github.com/"); + const out = execFileSync("gh", ["auth", "token"], { + encoding: "utf-8", + stdio: ["pipe", "pipe", "pipe"], + }).trim(); + return out || undefined; + } catch { + return undefined; + } +} + +async function ghApi(path: string): Promise { + const token = githubToken(); + const headers: Record = { + Accept: "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }; + if (token) headers.Authorization = `Bearer ${token}`; + const res = await fetch(`${GITHUB_API}${path}`, { headers }); + if (!res.ok) { + const body = await res.text().catch(() => ""); + const hint = res.status === 401 || res.status === 403 + ? " (set GITHUB_TOKEN or run `gh auth login` for higher rate limits / private access)" + : ""; + throw new CliError(`GitHub API ${res.status} ${res.statusText} for ${path}${hint}${body ? `: ${body.slice(0, 300)}` : ""}`); } + return await (res.json() as Promise); } function emulatorScriptsDir(): string { @@ -104,6 +147,16 @@ function emulatorScriptsDir(): string { throw new CliError("Emulator scripts not found in CLI bundle."); } +function baseEnvPath(): string { + // Lives one directory up from the scripts dir in both bundled and repo + // layouts (dist/.env.development vs docker/local-emulator/.env.development). + const path = resolve(emulatorScriptsDir(), "..", ".env.development"); + if (!existsSync(path)) { + throw new CliError(`Emulator base.env not found at ${path}`); + } + return path; +} + function emulatorSpawnEnv(extra?: Record): NodeJS.ProcessEnv { return { ...process.env, @@ -113,6 +166,33 @@ function emulatorSpawnEnv(extra?: Record): NodeJS.ProcessEnv { }; } +// Generate the runtime config ISO that the VM mounts via STACKCFG. Replaces +// the hdiutil/mkisofs/genisoimage host dep — see ../lib/iso.ts. +function prepareRuntimeConfigIso(): void { + const vmDir = join(emulatorRunDir(), "vm"); + mkdirSync(vmDir, { recursive: true }); + const portPrefix = process.env.PORT_PREFIX ?? process.env.NEXT_PUBLIC_STACK_PORT_PREFIX ?? DEFAULT_PORT_PREFIX; + const dashboardPort = envPort("EMULATOR_DASHBOARD_PORT", DEFAULT_EMULATOR_DASHBOARD_PORT); + const backendPort = envPort("EMULATOR_BACKEND_PORT", DEFAULT_EMULATOR_BACKEND_PORT); + const minioPort = envPort("EMULATOR_MINIO_PORT", DEFAULT_EMULATOR_MINIO_PORT); + const inbucketPort = envPort("EMULATOR_INBUCKET_PORT", DEFAULT_EMULATOR_INBUCKET_PORT); + + const runtimeEnv = [ + `STACK_EMULATOR_PORT_PREFIX=${portPrefix}`, + `STACK_EMULATOR_DASHBOARD_HOST_PORT=${dashboardPort}`, + `STACK_EMULATOR_BACKEND_HOST_PORT=${backendPort}`, + `STACK_EMULATOR_MINIO_HOST_PORT=${minioPort}`, + `STACK_EMULATOR_INBUCKET_HOST_PORT=${inbucketPort}`, + `STACK_EMULATOR_VM_DIR_HOST=${vmDir}`, + "", + ].join("\n"); + const baseEnv = readFileSync(baseEnvPath()); + writeIso(join(vmDir, "runtime-config.iso"), "STACKCFG", [ + { name: "runtime.env", data: Buffer.from(runtimeEnv, "utf-8") }, + { name: "base.env", data: baseEnv }, + ]); +} + function runEmulator(action: string, env?: Record): Promise { const scriptsDir = emulatorScriptsDir(); mkdirSync(emulatorRunDir(), { recursive: true }); @@ -149,17 +229,21 @@ async function startEmulator(arch: "arm64" | "amd64"): Promise { console.log("No emulator image found. Pulling latest..."); await pullRelease(arch); } + prepareRuntimeConfigIso(); await runEmulator("start", { EMULATOR_ARCH: arch }); } -function resolveArch(raw?: string): "arm64" | "amd64" { +export function resolveArch(raw?: string): "arm64" | "amd64" { const arch = raw ?? (process.arch === "arm64" ? "arm64" : process.arch === "x64" ? "amd64" : null); if (arch === "arm64" || arch === "amd64") return arch; throw new CliError(`Invalid architecture: ${raw ?? process.arch}. Expected arm64 or amd64.`); } +type ReleaseAsset = { name: string, url: string, size: number }; +type ReleaseResponse = { assets: ReleaseAsset[] }; + async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branch?: string, tag?: string } = {}) { - const repo = opts.repo ?? "stack-auth/stack-auth"; + const repo = opts.repo ?? DEFAULT_REPO; const branch = opts.branch ?? "dev"; const tag = opts.tag ?? `emulator-${branch}-latest`; const imageDir = emulatorImageDir(); @@ -171,39 +255,36 @@ async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branc // back to a cold boot. const snapshotAsset = `stack-emulator-${arch}.savevm.zst`; - const assets = JSON.parse(gh(["release", "view", tag, "--repo", repo, "--json", "assets"])) as { - assets: { name: string, apiUrl: string, size: number }[], - }; - const diskMatch = assets.assets.find((a) => a.name === diskAsset); + const release = await ghApi(`/repos/${repo}/releases/tags/${tag}`); + const diskMatch = release.assets.find((a) => a.name === diskAsset); if (!diskMatch) { throw new CliError(`Asset ${diskAsset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`); } - const snapshotMatch = assets.assets.find((a) => a.name === snapshotAsset); - const token = gh(["auth", "token"]); + const snapshotMatch = release.assets.find((a) => a.name === snapshotAsset); + const token = githubToken(); - await downloadAsset(diskMatch, imageDir, diskAsset, token, tag); + await downloadReleaseAsset(diskMatch, imageDir, diskAsset, token, tag); if (snapshotMatch) { - await downloadAsset(snapshotMatch, imageDir, snapshotAsset, token, tag); + await downloadReleaseAsset(snapshotMatch, imageDir, snapshotAsset, token, tag); } else { console.log(`Snapshot asset ${snapshotAsset} not available in release ${tag}; fast-start disabled for this image.`); } } -async function downloadAsset( - match: { name: string, apiUrl: string, size: number }, +async function downloadReleaseAsset( + match: ReleaseAsset, imageDir: string, asset: string, - token: string, + token: string | undefined, tag: string, ): Promise { const dest = join(imageDir, asset); const tmpDest = `${dest}.download`; console.log(`Pulling ${asset} from release ${tag}...`); + const headers: Record = { Accept: "application/octet-stream" }; + if (token) headers.Authorization = `Bearer ${token}`; try { - await downloadWithProgress(match.apiUrl, { - Authorization: `Bearer ${token}`, - Accept: "application/octet-stream", - }, tmpDest, match.size); + await downloadWithProgress(match.url, headers, tmpDest, match.size); } catch (err) { if (existsSync(tmpDest)) unlinkSync(tmpDest); if (err instanceof CliError) throw err; @@ -248,7 +329,7 @@ async function downloadWithProgress(url: string, headers: Record if (isTty) process.stderr.write("\n"); } -function renderProgressLine(downloaded: number, total: number, bytesPerSec: number): string { +export function renderProgressLine(downloaded: number, total: number, bytesPerSec: number): string { const barWidth = 30; const pct = total > 0 ? Math.min(100, (downloaded / total) * 100) : 0; const filled = total > 0 ? Math.round((downloaded / total) * barWidth) : 0; @@ -260,7 +341,7 @@ function renderProgressLine(downloaded: number, total: number, bytesPerSec: numb return ` [${bar}] ${pctStr} ${sizeStr} ${speedStr}${etaStr}`; } -function formatBytes(bytes: number): string { +export function formatBytes(bytes: number): string { if (!Number.isFinite(bytes) || bytes < 0) return "?"; const units = ["B", "KB", "MB", "GB", "TB"]; let v = bytes; @@ -272,7 +353,7 @@ function formatBytes(bytes: number): string { return `${v.toFixed(v < 10 && i > 0 ? 1 : 0)} ${units[i]}`; } -function formatDuration(seconds: number): string { +export function formatDuration(seconds: number): string { if (!Number.isFinite(seconds) || seconds < 0) return "?"; const s = Math.round(seconds); if (s < 60) return `${s}s`; @@ -284,6 +365,116 @@ function formatDuration(seconds: number): string { return `${h}h${rm.toString().padStart(2, "0")}m`; } +// --- Dependency preflight --------------------------------------------------- + +type BinarySpec = { name: string, install: string }; + +function commandExists(bin: string): boolean { + try { + execFileSync(process.platform === "win32" ? "where" : "which", [bin], { stdio: "pipe" }); + return true; + } catch { + return false; + } +} + +export function platformInstallHint(linuxPkg: string, macPkg: string): string { + switch (process.platform) { + case "darwin": { + return `brew install ${macPkg}`; + } + case "linux": { + return `apt install ${linuxPkg} (or your distro's equivalent)`; + } + default: { + return `install ${macPkg}`; + } + } +} + +function bin(name: string, linuxPkg: string, macPkg: string): BinarySpec { + return { name, install: platformInstallHint(linuxPkg, macPkg) }; +} + +function requireBinaries(commandName: string, bins: BinarySpec[]): void { + const missing = bins.filter((b) => !commandExists(b.name)); + if (missing.length === 0) return; + const lines = missing.map((b) => ` - ${b.name} → ${b.install}`); + throw new CliError( + `\`stack emulator ${commandName}\` requires the following missing binaries:\n${lines.join("\n")}`, + ); +} + +function warnIfMissing(commandName: string, bins: BinarySpec[]): void { + const missing = bins.filter((b) => !commandExists(b.name)); + if (missing.length === 0) return; + for (const b of missing) { + console.warn(`[stack emulator ${commandName}] optional dep '${b.name}' missing — feature degraded. Install: ${b.install}`); + } +} + +function aarch64FirmwareAvailable(): boolean { + return AARCH64_FIRMWARE_PATHS.some((p) => existsSync(p)); +} + +function commonVmBins(): BinarySpec[] { + return [ + bin("qemu-img", "qemu-utils", "qemu"), + bin("socat", "socat", "socat"), + bin("curl", "curl", "curl"), + bin("nc", "ncat", "netcat"), + bin("lsof", "lsof", "lsof"), + bin("openssl", "openssl", "openssl"), + ]; +} + +function archSpecificQemuBin(arch: "arm64" | "amd64"): BinarySpec { + if (arch === "arm64") { + return bin("qemu-system-aarch64", "qemu-system-arm", "qemu"); + } + return bin("qemu-system-x86_64", "qemu-system-x86", "qemu"); +} + +function preflightForVmStart(commandName: string, arch: "arm64" | "amd64"): void { + requireBinaries(commandName, [archSpecificQemuBin(arch), ...commonVmBins()]); + warnIfMissing(commandName, [bin("zstd", "zstd", "zstd")]); + if (arch === "arm64" && !aarch64FirmwareAvailable()) { + throw new CliError( + `aarch64 UEFI firmware not found. Looked in:\n${AARCH64_FIRMWARE_PATHS.map((p) => ` - ${p}`).join("\n")}\n` + + `Install: ${platformInstallHint("qemu-efi-aarch64", "qemu")}`, + ); + } +} + +// --- Workflow run / artifact downloads (replaces `gh run download`) --------- + +type WorkflowRunsResponse = { workflow_runs: { id: number }[] }; +type ArtifactsResponse = { artifacts: { id: number, name: string, size_in_bytes: number }[] }; +type PullResponse = { head: { ref: string } }; + +async function downloadArtifactByName(repo: string, runId: string, name: string, destDir: string): Promise { + const token = githubToken(); + if (!token) { + throw new CliError( + "Downloading workflow run artifacts requires authentication. Set GITHUB_TOKEN or run `gh auth login`.", + ); + } + const list = await ghApi(`/repos/${repo}/actions/runs/${runId}/artifacts?per_page=100`); + const match = list.artifacts.find((a) => a.name === name); + if (!match) return false; + const zipPath = join(destDir, `${name}.zip`); + console.log(`Downloading artifact '${name}' from run ${runId}...`); + await downloadWithProgress( + `${GITHUB_API}/repos/${repo}/actions/artifacts/${match.id}/zip`, + { Accept: "application/octet-stream", Authorization: `Bearer ${token}` }, + zipPath, + match.size_in_bytes, + ); + await extract(zipPath, { dir: destDir }); + unlinkSync(zipPath); + return true; +} + export function registerEmulatorCommand(program: Command) { const emulator = program.command("emulator").description("Manage the QEMU local emulator"); @@ -298,16 +489,21 @@ export function registerEmulatorCommand(program: Command) { .option("--run ", "Pull from a specific workflow run's artifacts") .action(async (opts) => { const arch = resolveArch(opts.arch); - const repo = opts.repo ?? "stack-auth/stack-auth"; + const repo = opts.repo ?? DEFAULT_REPO; if (opts.run || opts.pr) { let runId = opts.run as string | undefined; if (!runId) { console.log(`Finding latest successful build for PR #${opts.pr}...`); - const { headRefName } = JSON.parse(gh(["pr", "view", opts.pr, "--repo", repo, "--json", "headRefName"])); - const runs = JSON.parse(gh(["run", "list", "--repo", repo, "--workflow", "qemu-emulator-build.yaml", "--branch", headRefName, "--status", "success", "--limit", "1", "--json", "databaseId"])); - if (runs.length === 0) throw new CliError(`No successful build found for PR #${opts.pr} (branch: ${headRefName}).`); - runId = String(runs[0].databaseId); + const pr = await ghApi(`/repos/${repo}/pulls/${opts.pr}`); + const headRefName = pr.head.ref; + const runs = await ghApi( + `/repos/${repo}/actions/workflows/qemu-emulator-build.yaml/runs?branch=${encodeURIComponent(headRefName)}&status=success&per_page=1`, + ); + if (runs.workflow_runs.length === 0) { + throw new CliError(`No successful build found for PR #${opts.pr} (branch: ${headRefName}).`); + } + runId = String(runs.workflow_runs[0].id); } const imageDir = emulatorImageDir(); @@ -316,21 +512,22 @@ export function registerEmulatorCommand(program: Command) { const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`); if (existsSync(dest)) unlinkSync(dest); if (existsSync(snapshotDest)) unlinkSync(snapshotDest); - console.log(`Downloading qemu-emulator-${arch} from workflow run ${runId}...`); - try { - execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}`, "--dir", imageDir], { stdio: "inherit" }); - } catch (err) { - throw new CliError(`Failed to download artifact from run ${runId}: ${err instanceof Error ? err.message : err}`); + const downloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}`, imageDir); + if (!downloaded) { + throw new CliError(`Artifact qemu-emulator-${arch} not found in workflow run ${runId}.`); } if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`); console.log(`Downloaded: ${dest}`); // Snapshot artifact is optional — older CI builds may not produce it. + let snapshotDownloaded = false; try { - execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}-savevm`, "--dir", imageDir], { stdio: "pipe" }); - if (existsSync(snapshotDest)) { - console.log(`Downloaded: ${snapshotDest}`); - } - } catch { + snapshotDownloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}-savevm`, imageDir); + } catch (err) { + console.log(`Snapshot artifact unavailable for run ${runId}: ${err instanceof Error ? err.message : err}`); + } + if (snapshotDownloaded && existsSync(snapshotDest)) { + console.log(`Downloaded: ${snapshotDest}`); + } else if (!snapshotDownloaded) { console.log(`Snapshot artifact not available for run ${runId}; fast-start disabled.`); } } else { @@ -345,6 +542,7 @@ export function registerEmulatorCommand(program: Command) { .option("--config-file ", "Path to a config file; when set, credentials for this project are printed to stdout as JSON") .action(async (opts: { arch?: string, configFile?: string }) => { const arch = resolveArch(opts.arch); + preflightForVmStart("start", arch); let resolvedConfigFile: string | undefined; if (opts.configFile) { @@ -375,6 +573,7 @@ export function registerEmulatorCommand(program: Command) { .option("--config-file ", "Path to a config file; fetches credentials and injects STACK_PROJECT_ID / STACK_PUBLISHABLE_CLIENT_KEY / STACK_SECRET_SERVER_KEY into the child") .action(async (cmd: string, opts: { arch?: string, configFile?: string }) => { const arch = resolveArch(opts.arch); + preflightForVmStart("run", arch); let resolvedConfigFile: string | undefined; if (opts.configFile) { @@ -429,18 +628,50 @@ export function registerEmulatorCommand(program: Command) { }); }); - emulator.command("stop").description("Stop the emulator (data preserved; use 'reset' to clear)").action(() => runEmulator("stop")); - emulator.command("reset").description("Reset emulator state for a fresh boot").action(() => runEmulator("reset")); - emulator.command("status").description("Show emulator and service health").action(() => runEmulator("status")); + emulator + .command("stop") + .description("Stop the emulator (data preserved; use 'reset' to clear)") + .action(() => { + requireBinaries("stop", [bin("socat", "socat", "socat")]); + return runEmulator("stop"); + }); + + emulator + .command("reset") + .description("Reset emulator state for a fresh boot") + .action(() => { + requireBinaries("reset", [bin("socat", "socat", "socat")]); + return runEmulator("reset"); + }); + + emulator + .command("status") + .description("Show emulator and service health") + .action(() => { + requireBinaries("status", [ + bin("curl", "curl", "curl"), + bin("nc", "ncat", "netcat"), + ]); + return runEmulator("status"); + }); emulator .command("list-releases") .description("List available emulator releases") .option("--repo ", "GitHub repository (default: stack-auth/stack-auth)") - .action((opts) => { - const repo = opts.repo ?? "stack-auth/stack-auth"; + .action(async (opts) => { + const repo = opts.repo ?? DEFAULT_REPO; console.log(`Available emulator releases from ${repo}:\n`); - const lines = gh(["release", "list", "--repo", repo, "--limit", "20"]).split("\n").filter((l) => l.toLowerCase().includes("emulator")); + type Release = { tag_name: string, name: string | null, published_at: string | null, draft: boolean, prerelease: boolean }; + const releases = await ghApi(`/repos/${repo}/releases?per_page=50`); + const lines = releases + .filter((r) => (r.tag_name + " " + (r.name ?? "")).toLowerCase().includes("emulator")) + .slice(0, 20) + .map((r) => { + const status = r.draft ? "Draft" : r.prerelease ? "Pre-release" : "Latest"; + const date = r.published_at ? r.published_at.slice(0, 10) : ""; + return `${r.tag_name}\t${status}\t${date}`; + }); if (lines.length === 0) console.log("No emulator releases found."); else for (const line of lines) console.log(line); }); diff --git a/packages/stack-cli/src/lib/iso.test.ts b/packages/stack-cli/src/lib/iso.test.ts new file mode 100644 index 0000000000..91f069e7a5 --- /dev/null +++ b/packages/stack-cli/src/lib/iso.test.ts @@ -0,0 +1,259 @@ +import { describe, expect, it } from "vitest"; +import { buildIso, type IsoFile } from "./iso.js"; + +const SECTOR = 2048; + +// --- Test helpers: a minimal ISO 9660 parser, just enough to walk the +// directory records we produce so tests can assert the bytes we emitted really +// are addressable at the offsets claimed in the directory records. + +function readSector(iso: Buffer, sector: number): Buffer { + return iso.subarray(sector * SECTOR, (sector + 1) * SECTOR); +} + +function readVolumeDescriptor(iso: Buffer, sector: number): { type: number, id: string } { + const buf = readSector(iso, sector); + return { type: buf[0], id: buf.toString("ascii", 1, 6) }; +} + +type DirRecord = { + lenDr: number, + extentSector: number, + dataLength: number, + isDir: boolean, + fileId: Buffer, +}; + +function parseDirRecords(sector: Buffer): DirRecord[] { + const records: DirRecord[] = []; + let offset = 0; + while (offset < sector.length) { + const lenDr = sector[offset]; + if (lenDr === 0) break; + const extentSector = sector.readUInt32LE(offset + 2); + const dataLength = sector.readUInt32LE(offset + 10); + const flags = sector[offset + 25]; + const lenFi = sector[offset + 32]; + const fileId = sector.subarray(offset + 33, offset + 33 + lenFi); + records.push({ + lenDr, + extentSector, + dataLength, + isDir: (flags & 0x02) !== 0, + fileId: Buffer.from(fileId), + }); + offset += lenDr; + } + return records; +} + +// Follow PVD → root dir → pull file bytes by ISO-9660 name ("NAME.EXT;1"). +function readIsoFile(iso: Buffer, isoName: string): Buffer | null { + const pvd = readSector(iso, 16); + const rootSector = pvd.readUInt32LE(156 + 2); + const rootRecords = parseDirRecords(readSector(iso, rootSector)); + const match = rootRecords.find((r) => r.fileId.toString("ascii") === isoName); + if (!match) return null; + const start = match.extentSector * SECTOR; + return iso.subarray(start, start + match.dataLength); +} + +// Same, but follow the Joliet SVD (so names are UCS-2 BE). +function readJolietFile(iso: Buffer, name: string): Buffer | null { + const svd = readSector(iso, 17); + if (svd[0] !== 2) return null; + const rootSector = svd.readUInt32LE(156 + 2); + const rootRecords = parseDirRecords(readSector(iso, rootSector)); + const expected = Buffer.alloc(name.length * 2); + for (let i = 0; i < name.length; i++) expected.writeUInt16BE(name.charCodeAt(i), i * 2); + const match = rootRecords.find((r) => r.fileId.equals(expected)); + if (!match) return null; + const start = match.extentSector * SECTOR; + return iso.subarray(start, start + match.dataLength); +} + +function sampleFile(name: string, size: number, byte = 0x41): IsoFile { + return { name, data: Buffer.alloc(size, byte) }; +} + +describe("buildIso — structural invariants", () => { + it("emits the ISO 9660 standard identifiers at sectors 16, 17, 18", () => { + const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("hi") }]); + expect(readVolumeDescriptor(iso, 16)).toEqual({ type: 1, id: "CD001" }); + expect(readVolumeDescriptor(iso, 17)).toEqual({ type: 2, id: "CD001" }); + expect(readVolumeDescriptor(iso, 18)).toEqual({ type: 0xff, id: "CD001" }); + }); + + it("stores the volume identifier verbatim in the PVD for blkid discovery", () => { + const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]); + const pvd = readSector(iso, 16); + expect(pvd.toString("ascii", 40, 40 + 8)).toBe("STACKCFG"); + }); + + it("stores the volume identifier in the Joliet SVD as UCS-2 BE", () => { + const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]); + const svd = readSector(iso, 17); + const ucs = svd.subarray(40, 40 + 16); + let decoded = ""; + for (let i = 0; i < ucs.length; i += 2) decoded += String.fromCharCode(ucs.readUInt16BE(i)); + expect(decoded).toBe("STACKCFG"); + }); + + it("sets the Joliet escape sequence %/E", () => { + const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]); + const svd = readSector(iso, 17); + expect(svd[88]).toBe(0x25); + expect(svd[89]).toBe(0x2f); + expect(svd[90]).toBe(0x45); + }); + + it("declares a volume space size equal to the emitted sector count", () => { + const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("hello world") }]); + const pvd = readSector(iso, 16); + const declared = pvd.readUInt32LE(80); + expect(iso.length).toBe(declared * SECTOR); + }); +}); + +describe("buildIso — file round-trip", () => { + it("makes files readable by ISO 9660 name", () => { + const iso = buildIso("STACKCFG", [ + { name: "runtime.env", data: Buffer.from("KEY=value\n") }, + { name: "base.env", data: Buffer.from("FOO=bar\n") }, + ]); + expect(readIsoFile(iso, "RUNTIME.ENV;1")?.toString()).toBe("KEY=value\n"); + expect(readIsoFile(iso, "BASE.ENV;1")?.toString()).toBe("FOO=bar\n"); + }); + + it("makes files readable by Joliet (lowercase) name", () => { + const iso = buildIso("STACKCFG", [ + { name: "runtime.env", data: Buffer.from("KEY=value\n") }, + { name: "base.env", data: Buffer.from("FOO=bar\n") }, + ]); + expect(readJolietFile(iso, "runtime.env")?.toString()).toBe("KEY=value\n"); + expect(readJolietFile(iso, "base.env")?.toString()).toBe("FOO=bar\n"); + }); + + it("preserves exact file contents byte-for-byte", () => { + const content = Buffer.from([0x00, 0xff, 0x7f, 0x80, 0x41, 0x42, 0x43]); + const iso = buildIso("STACKCFG", [{ name: "bin.dat", data: content }]); + expect(readJolietFile(iso, "bin.dat")?.equals(content)).toBe(true); + }); + + it("handles files whose length is exactly one sector", () => { + const content = Buffer.alloc(SECTOR, 0x37); + const iso = buildIso("STACKCFG", [{ name: "one.bin", data: content }]); + expect(readJolietFile(iso, "one.bin")?.equals(content)).toBe(true); + }); + + it("handles files that span multiple sectors", () => { + const content = Buffer.alloc(SECTOR * 3 + 17, 0x55); + const iso = buildIso("STACKCFG", [{ name: "big.bin", data: content }]); + expect(readJolietFile(iso, "big.bin")?.equals(content)).toBe(true); + }); + + it("keeps files byte-exact at the claimed extent sector across multi-file layouts", () => { + // Fingerprint each file so we can tell them apart even if extents shift. + const files: IsoFile[] = [ + { name: "alpha.bin", data: Buffer.alloc(SECTOR + 5, 0xaa) }, + { name: "beta.bin", data: Buffer.alloc(SECTOR * 2, 0xbb) }, + { name: "gamma.bin", data: Buffer.alloc(42, 0xcc) }, + ]; + const iso = buildIso("STACKCFG", files); + for (const f of files) { + expect(readJolietFile(iso, f.name)?.equals(f.data)).toBe(true); + } + }); +}); + +describe("buildIso — edge cases", () => { + it("handles empty files without misaligning subsequent file extents", () => { + // Regression: `padToSector(Buffer.alloc(0))` used to return a 0-byte + // buffer, but the layout reserved 1 sector for the empty file — the next + // file was then read from the empty file's reserved slot. + const files: IsoFile[] = [ + { name: "empty.txt", data: Buffer.alloc(0) }, + { name: "after.txt", data: Buffer.from("marker\n") }, + ]; + const iso = buildIso("STACKCFG", files); + expect(readJolietFile(iso, "empty.txt")?.length).toBe(0); + expect(readJolietFile(iso, "after.txt")?.toString()).toBe("marker\n"); + // And: the declared volume space size must cover every emitted byte. + const pvd = readSector(iso, 16); + expect(iso.length).toBe(pvd.readUInt32LE(80) * SECTOR); + }); + + it("writes the exact file length in the directory record (not padded to sector)", () => { + const content = Buffer.from("abc"); + const iso = buildIso("STACKCFG", [{ name: "tiny.txt", data: content }]); + const svd = readSector(iso, 17); + const rootSector = svd.readUInt32LE(156 + 2); + const records = parseDirRecords(readSector(iso, rootSector)); + const file = records.find((r) => !r.isDir); + expect(file?.dataLength).toBe(3); + }); + + it("places the root directory records for . and .. pointing at the root extent", () => { + const iso = buildIso("STACKCFG", [{ name: "x.txt", data: Buffer.from("1") }]); + const svd = readSector(iso, 17); + const rootSector = svd.readUInt32LE(156 + 2); + const records = parseDirRecords(readSector(iso, rootSector)); + expect(records.length).toBeGreaterThanOrEqual(2); + expect(records[0].fileId.equals(Buffer.from([0x00]))).toBe(true); + expect(records[1].fileId.equals(Buffer.from([0x01]))).toBe(true); + expect(records[0].isDir).toBe(true); + expect(records[0].extentSector).toBe(rootSector); + expect(records[1].extentSector).toBe(rootSector); + }); + + it("truncates volume identifiers longer than 32 bytes rather than corrupting the PVD", () => { + const longId = "A".repeat(64); + const iso = buildIso(longId, [{ name: "x.txt", data: Buffer.from("1") }]); + const pvd = readSector(iso, 16); + expect(pvd.toString("ascii", 40, 40 + 32)).toBe("A".repeat(32)); + // Sector 17 should still be the Joliet SVD, not clobbered. + expect(pvd[881]).toBe(1); + expect(readVolumeDescriptor(iso, 17).type).toBe(2); + }); + + it("rejects an input set whose root directory record overflows one sector", () => { + // Each Joliet dir record for an N-char name is 33 + 2N + (2N even ? 1 : 0) + // ≈ 2N + 34 bytes. A sector is 2048. Thirty 30-char names → ~1860 bytes + // plus "." + ".." (68) → fits. Eighty of them → well over a sector. + const many: IsoFile[] = Array.from({ length: 80 }, (_, i) => ({ + name: `file-${String(i).padStart(3, "0")}-padding-padding.bin`, + data: Buffer.from("x"), + })); + expect(() => buildIso("STACKCFG", many)).toThrow(/Root directory exceeds/); + }); + + it("produces a sector-aligned buffer regardless of file sizes", () => { + for (const size of [0, 1, SECTOR - 1, SECTOR, SECTOR + 1, SECTOR * 5 - 1]) { + const iso = buildIso("STACKCFG", [sampleFile("a.bin", size)]); + expect(iso.length % SECTOR).toBe(0); + } + }); +}); + +describe("buildIso — multiple file sector layout", () => { + it("assigns non-overlapping extents to all files", () => { + const files: IsoFile[] = [ + sampleFile("a.bin", 10, 0x01), + sampleFile("b.bin", SECTOR, 0x02), + sampleFile("c.bin", SECTOR * 2 + 500, 0x03), + sampleFile("d.bin", 1, 0x04), + ]; + const iso = buildIso("STACKCFG", files); + const svd = readSector(iso, 17); + const rootSector = svd.readUInt32LE(156 + 2); + const records = parseDirRecords(readSector(iso, rootSector)).filter((r) => !r.isDir); + + // Extents must be strictly ordered and non-overlapping. + const sorted = [...records].sort((a, b) => a.extentSector - b.extentSector); + for (let i = 1; i < sorted.length; i++) { + const prev = sorted[i - 1]; + const prevEndSector = prev.extentSector + Math.max(1, Math.ceil(prev.dataLength / SECTOR)); + expect(sorted[i].extentSector).toBeGreaterThanOrEqual(prevEndSector); + } + }); +}); diff --git a/packages/stack-cli/src/lib/iso.ts b/packages/stack-cli/src/lib/iso.ts new file mode 100644 index 0000000000..b226af0bc7 --- /dev/null +++ b/packages/stack-cli/src/lib/iso.ts @@ -0,0 +1,403 @@ +// Minimal ISO 9660 + Joliet writer used to package the runtime config blob +// that the emulator VM mounts at boot via /dev/disk/by-label/STACKCFG. +// +// Replaces the host-side dependency on hdiutil/mkisofs/genisoimage. Only the +// subset of ECMA-119 needed for a single-level root directory of small UTF-8 +// text files is implemented: PVD + Joliet SVD + path tables + root dir + file +// data. Names are emitted in both ISO 9660 ("BASE.ENV;1") and Joliet +// (lower-case UCS-2) form so Linux mounts the Joliet view by default and the +// guest's `source /mnt/stack-runtime/runtime.env` works unchanged. + +import { writeFileSync } from "fs"; + +const SECTOR = 2048; + +function bothEndian32(n: number): Buffer { + const b = Buffer.alloc(8); + b.writeUInt32LE(n, 0); + b.writeUInt32BE(n, 4); + return b; +} + +function bothEndian16(n: number): Buffer { + const b = Buffer.alloc(4); + b.writeUInt16LE(n, 0); + b.writeUInt16BE(n, 2); + return b; +} + +function padString(s: string, len: number, fill = " "): Buffer { + const buf = Buffer.alloc(len, fill.charCodeAt(0)); + buf.write(s.slice(0, len), 0, "ascii"); + return buf; +} + +function ucs2BE(s: string): Buffer { + const buf = Buffer.alloc(s.length * 2); + for (let i = 0; i < s.length; i++) { + buf.writeUInt16BE(s.charCodeAt(i), i * 2); + } + return buf; +} + +function padUcs2BE(s: string, byteLen: number): Buffer { + const buf = Buffer.alloc(byteLen); + const wholeChars = Math.floor(byteLen / 2); + for (let i = 0; i < wholeChars; i++) { + buf.writeUInt16BE(i < s.length ? s.charCodeAt(i) : 0x0020, i * 2); + } + // Odd-length fields (e.g. 37-byte Copyright/Abstract/Bibliographic IDs) get + // a trailing space byte; spec allows either NUL or 0x20 padding. + if (byteLen % 2 === 1) { + buf[byteLen - 1] = 0x20; + } + return buf; +} + +function dirRecordingDate(d: Date): Buffer { + const buf = Buffer.alloc(7); + buf[0] = d.getUTCFullYear() - 1900; + buf[1] = d.getUTCMonth() + 1; + buf[2] = d.getUTCDate(); + buf[3] = d.getUTCHours(); + buf[4] = d.getUTCMinutes(); + buf[5] = d.getUTCSeconds(); + buf[6] = 0; + return buf; +} + +function volumeDate(d: Date): Buffer { + const pad = (n: number, w: number) => String(n).padStart(w, "0"); + const s = + pad(d.getUTCFullYear(), 4) + + pad(d.getUTCMonth() + 1, 2) + + pad(d.getUTCDate(), 2) + + pad(d.getUTCHours(), 2) + + pad(d.getUTCMinutes(), 2) + + pad(d.getUTCSeconds(), 2) + + "00"; + const buf = Buffer.alloc(17); + buf.write(s, 0, 16, "ascii"); + buf[16] = 0; + return buf; +} + +const UNUSED_VOLUME_DATE = (() => { + const buf = Buffer.alloc(17, "0".charCodeAt(0)); + buf[16] = 0; + return buf; +})(); + +// Encodes an ISO 9660 file identifier ("FILENAME.EXT;1"). Caller must pass an +// already-uppercased 8.3 name without the version suffix. +function isoFileIdentifier(name: string): Buffer { + const upper = name.toUpperCase(); + return Buffer.from(`${upper};1`, "ascii"); +} + +// Builds a single directory record. `idBytes` is the file identifier bytes +// (ASCII for ISO, UCS-2 BE for Joliet); `idForDot` overrides with a single +// 0x00 / 0x01 byte for "." / ".." entries. +function buildDirRecord( + extentSector: number, + dataLength: number, + isDir: boolean, + recDate: Buffer, + idBytes: Buffer, +): Buffer { + const lenFi = idBytes.length; + const pad = lenFi % 2 === 0 ? 1 : 0; + const lenDr = 33 + lenFi + pad; + const buf = Buffer.alloc(lenDr); + buf[0] = lenDr; + buf[1] = 0; + bothEndian32(extentSector).copy(buf, 2); + bothEndian32(dataLength).copy(buf, 10); + recDate.copy(buf, 18); + buf[25] = isDir ? 0x02 : 0x00; + buf[26] = 0; + buf[27] = 0; + bothEndian16(1).copy(buf, 28); + buf[32] = lenFi; + idBytes.copy(buf, 33); + return buf; +} + +function buildRootDirEntries( + rootSector: number, + rootSize: number, + recDate: Buffer, + files: { idBytes: Buffer, sector: number, size: number }[], +): Buffer { + const records: Buffer[] = []; + records.push(buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x00]))); + records.push(buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x01]))); + for (const f of files) { + records.push(buildDirRecord(f.sector, f.size, false, recDate, f.idBytes)); + } + + // Records may not span sector boundaries; pack them with sector padding. + const sectors: Buffer[] = []; + let current = Buffer.alloc(0); + for (const r of records) { + if (current.length + r.length > SECTOR) { + sectors.push(Buffer.concat([current, Buffer.alloc(SECTOR - current.length)])); + current = Buffer.alloc(0); + } + current = Buffer.concat([current, r]); + } + if (current.length > 0) { + sectors.push(Buffer.concat([current, Buffer.alloc(SECTOR - current.length)])); + } + return Buffer.concat(sectors); +} + +// Single-entry path table for the root directory. Used for both L (LE) and M +// (BE) tables; pass writeUInt32LE/BE accordingly. +function buildPathTable(rootSector: number, byteOrder: "LE" | "BE"): Buffer { + const buf = Buffer.alloc(10); + buf[0] = 1; // LEN_DI + buf[1] = 0; // EAR length + if (byteOrder === "LE") { + buf.writeUInt32LE(rootSector, 2); + buf.writeUInt16LE(1, 6); + } else { + buf.writeUInt32BE(rootSector, 2); + buf.writeUInt16BE(1, 6); + } + buf[8] = 0; // root identifier + buf[9] = 0; // pad + return buf; +} + +function padToSector(buf: Buffer): Buffer { + const rem = buf.length % SECTOR; + if (rem === 0) return buf; + return Buffer.concat([buf, Buffer.alloc(SECTOR - rem)]); +} + +// Build a Volume Descriptor (PVD or Joliet SVD). `joliet` switches volume-name +// fields to UCS-2 BE and sets the Joliet escape sequence. +function buildVolumeDescriptor(opts: { + joliet: boolean, + volumeId: string, + volumeSpaceSize: number, + pathTableSize: number, + lPathSector: number, + mPathSector: number, + rootDirRecord: Buffer, + date: Buffer, +}): Buffer { + const buf = Buffer.alloc(SECTOR); + buf[0] = opts.joliet ? 2 : 1; + buf.write("CD001", 1, 5, "ascii"); + buf[6] = 1; + buf[7] = 0; + + // System Identifier (32 bytes) + if (opts.joliet) { + padUcs2BE("", 32).copy(buf, 8); + } else { + padString("", 32).copy(buf, 8); + } + + // Volume Identifier (32 bytes) — must be "STACKCFG" so udev exposes it as + // /dev/disk/by-label/STACKCFG. blkid reads from PVD by default but Joliet + // takes precedence when both are present. + if (opts.joliet) { + padUcs2BE(opts.volumeId, 32).copy(buf, 40); + } else { + padString(opts.volumeId, 32).copy(buf, 40); + } + + bothEndian32(opts.volumeSpaceSize).copy(buf, 80); + + if (opts.joliet) { + // Escape sequence for UCS-2 Level 3 ("%/E") at offset 88 (32 bytes). + buf[88] = 0x25; + buf[89] = 0x2f; + buf[90] = 0x45; + } + + bothEndian16(1).copy(buf, 120); // Volume Set Size + bothEndian16(1).copy(buf, 124); // Volume Sequence Number + bothEndian16(SECTOR).copy(buf, 128); // Logical Block Size + bothEndian32(opts.pathTableSize).copy(buf, 132); + buf.writeUInt32LE(opts.lPathSector, 140); + buf.writeUInt32LE(0, 144); // optional L + buf.writeUInt32BE(opts.mPathSector, 148); + buf.writeUInt32BE(0, 152); // optional M + + opts.rootDirRecord.copy(buf, 156); + + const padFn = opts.joliet + ? (s: string, n: number) => padUcs2BE(s, n) + : (s: string, n: number) => padString(s, n); + + padFn("", 128).copy(buf, 190); // Volume Set Identifier + padFn("", 128).copy(buf, 318); // Publisher Identifier + padFn("", 128).copy(buf, 446); // Data Preparer Identifier + padFn("", 128).copy(buf, 574); // Application Identifier + padFn("", 37).copy(buf, 702); // Copyright File Identifier + padFn("", 37).copy(buf, 739); // Abstract File Identifier + padFn("", 37).copy(buf, 776); // Bibliographic File Identifier + + opts.date.copy(buf, 813); // Creation + opts.date.copy(buf, 830); // Modification + UNUSED_VOLUME_DATE.copy(buf, 847); // Expiration + UNUSED_VOLUME_DATE.copy(buf, 864); // Effective + + buf[881] = 1; // File Structure Version + return buf; +} + +function buildVolumeDescriptorTerminator(): Buffer { + const buf = Buffer.alloc(SECTOR); + buf[0] = 0xff; + buf.write("CD001", 1, 5, "ascii"); + buf[6] = 1; + return buf; +} + +// Builds the 34-byte root directory record that lives inside the volume +// descriptor (BP 157-190 of PVD/SVD). Identical layout to a regular directory +// record but identifier is the single byte 0x00. +function buildRootDirRecordInVD(rootSector: number, rootSize: number, recDate: Buffer): Buffer { + return buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x00])); +} + +export type IsoFile = { name: string, data: Buffer }; + +export function buildIso(volumeId: string, files: IsoFile[]): Buffer { + const date = new Date(); + const recDate = dirRecordingDate(date); + const volDateBuf = volumeDate(date); + + // Compute per-file directory record sizes for both views. + const isoEntries = files.map((f) => ({ + file: f, + idBytes: isoFileIdentifier(f.name), + })); + const jolietEntries = files.map((f) => ({ + file: f, + idBytes: ucs2BE(f.name), + })); + + // We need root sector + size before we know file sectors — but file sectors + // depend only on the root dir size, which depends only on the file count. + // Compute the root dir buffer twice if needed (sizes are stable since they + // depend only on identifier bytes, not on file extents). + const dirRecLen = (lenFi: number) => 33 + lenFi + (lenFi % 2 === 0 ? 1 : 0); + const isoRootSize = 34 + 34 + isoEntries.reduce((acc, e) => acc + dirRecLen(e.idBytes.length), 0); + const jolietRootSize = 34 + 34 + jolietEntries.reduce((acc, e) => acc + dirRecLen(e.idBytes.length), 0); + if (isoRootSize > SECTOR || jolietRootSize > SECTOR) { + throw new Error(`Root directory exceeds ${SECTOR} bytes; multi-sector root not supported.`); + } + + // Sector layout. + const sysAreaSectors = 16; + const pvdSector = sysAreaSectors; + const svdSector = pvdSector + 1; + const termSector = svdSector + 1; + const isoLPathSector = termSector + 1; + const isoMPathSector = isoLPathSector + 1; + const jolietLPathSector = isoMPathSector + 1; + const jolietMPathSector = jolietLPathSector + 1; + const isoRootSector = jolietMPathSector + 1; + const jolietRootSector = isoRootSector + 1; + let nextSector = jolietRootSector + 1; + + const fileLayout = files.map((f) => { + const sector = nextSector; + const sectors = Math.max(1, Math.ceil(f.data.length / SECTOR)); + nextSector += sectors; + return { file: f, sector, size: f.data.length }; + }); + + const totalSectors = nextSector; + const pathTableSize = 10; + + const isoRootDirRecordVD = buildRootDirRecordInVD(isoRootSector, SECTOR, recDate); + const jolietRootDirRecordVD = buildRootDirRecordInVD(jolietRootSector, SECTOR, recDate); + + const pvd = buildVolumeDescriptor({ + joliet: false, + volumeId, + volumeSpaceSize: totalSectors, + pathTableSize, + lPathSector: isoLPathSector, + mPathSector: isoMPathSector, + rootDirRecord: isoRootDirRecordVD, + date: volDateBuf, + }); + + const svd = buildVolumeDescriptor({ + joliet: true, + volumeId, + volumeSpaceSize: totalSectors, + pathTableSize, + lPathSector: jolietLPathSector, + mPathSector: jolietMPathSector, + rootDirRecord: jolietRootDirRecordVD, + date: volDateBuf, + }); + + const term = buildVolumeDescriptorTerminator(); + const isoLPath = padToSector(buildPathTable(isoRootSector, "LE")); + const isoMPath = padToSector(buildPathTable(isoRootSector, "BE")); + const jolietLPath = padToSector(buildPathTable(jolietRootSector, "LE")); + const jolietMPath = padToSector(buildPathTable(jolietRootSector, "BE")); + + const isoRoot = buildRootDirEntries( + isoRootSector, + SECTOR, + recDate, + isoEntries.map((e, i) => ({ + idBytes: e.idBytes, + sector: fileLayout[i].sector, + size: fileLayout[i].size, + })), + ); + const jolietRoot = buildRootDirEntries( + jolietRootSector, + SECTOR, + recDate, + jolietEntries.map((e, i) => ({ + idBytes: e.idBytes, + sector: fileLayout[i].sector, + size: fileLayout[i].size, + })), + ); + + // Each file must occupy the exact number of sectors the layout reserved for + // it. An empty file reserves 1 sector (via Math.max(1, …)) but + // padToSector(Buffer.alloc(0)) returns 0 bytes — that would desync every + // subsequent file's extent. Explicitly pad to the reserved size instead. + const fileBuffers = fileLayout.map((f) => { + const reservedSectors = Math.max(1, Math.ceil(f.file.data.length / SECTOR)); + const reservedBytes = reservedSectors * SECTOR; + if (f.file.data.length === reservedBytes) return f.file.data; + const out = Buffer.alloc(reservedBytes); + f.file.data.copy(out, 0); + return out; + }); + + return Buffer.concat([ + Buffer.alloc(sysAreaSectors * SECTOR), + pvd, + svd, + term, + isoLPath, + isoMPath, + jolietLPath, + jolietMPath, + isoRoot, + jolietRoot, + ...fileBuffers, + ]); +} + +export function writeIso(path: string, volumeId: string, files: IsoFile[]): void { + const buf = buildIso(volumeId, files); + writeFileSync(path, buf); +} diff --git a/packages/stack-cli/vitest.config.ts b/packages/stack-cli/vitest.config.ts new file mode 100644 index 0000000000..0caeccbe44 --- /dev/null +++ b/packages/stack-cli/vitest.config.ts @@ -0,0 +1,19 @@ +import { defineConfig, mergeConfig } from 'vitest/config'; +import sharedConfig from '../../vitest.shared'; + +export default mergeConfig( + sharedConfig, + defineConfig({ + test: { + // Override the shared `maxWorkers: 8` — with it set, tinypool defaults + // minThreads to the host's available parallelism, producing + // "minThreads/maxThreads must not conflict" on machines with >8 cores. + poolOptions: { + threads: { + minThreads: 1, + maxThreads: 4, + }, + }, + }, + }), +); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index dd890a4bd2..5c2364de0a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -737,7 +737,7 @@ importers: version: 1.166.6(crossws@0.4.4(srvx@0.8.16)) nitro: specifier: ^3.0.0 - version: 3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(rolldown@1.0.0-rc.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2) + version: 3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2) react: specifier: 19.2.1 version: 19.2.1 @@ -950,7 +950,7 @@ importers: devDependencies: mint: specifier: ^4.2.487 - version: 4.2.487(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@24.9.2)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0) + version: 4.2.487(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@20.17.6)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0) examples/cjs-test: dependencies: @@ -1498,10 +1498,10 @@ importers: version: link:../../packages/stack '@supabase/ssr': specifier: latest - version: 0.10.0(@supabase/supabase-js@2.101.1) + version: 0.10.0(@supabase/supabase-js@2.102.1) '@supabase/supabase-js': specifier: latest - version: 2.101.1 + version: 2.102.1 jose: specifier: ^5.2.2 version: 5.6.3 @@ -2024,6 +2024,9 @@ importers: commander: specifier: ^13.1.0 version: 13.1.0 + extract-zip: + specifier: ^2.0.1 + version: 2.0.1 jiti: specifier: ^2.4.2 version: 2.6.1 @@ -9760,23 +9763,23 @@ packages: resolution: {integrity: sha512-SXuhqhuR5FXaYgKTXzZJeqtVA6JKb9IZWaGeEUxHHiOcFy2p51wccO72bYpXwoK4D5pzQOIYLTuAc7etxyMmwg==} engines: {node: '>=12.16'} - '@supabase/auth-js@2.101.1': - resolution: {integrity: sha512-Kd0Wey+RkFHgyVep7adS6UOE2pN6MJ3mZ32PAXSvfw6IjUkFRC7IQpdZZjUOcUe5pXr1ejufCRgF6lsGINe4Tw==} + '@supabase/auth-js@2.102.1': + resolution: {integrity: sha512-2uH2WB0H98TOGDtaFWhxIcR42Dro/VB7VDZanz/4bVJsqioIue1m3TUqu3xciDm2W9r+1LXQvYNsYbQfWmD+uQ==} engines: {node: '>=20.0.0'} - '@supabase/functions-js@2.101.1': - resolution: {integrity: sha512-OZWU7YtaG+NNNFZK8p/FuJ6gpq7pFyrG2fLOopP73HAIDHDGpOttPJapvO8ADu3RkqfQfkwrB354vPkSBbZ20A==} + '@supabase/functions-js@2.102.1': + resolution: {integrity: sha512-UcrcKTPnAIo+Yp9Jjq9XXwFbsmgRYY637mwka9ZjmTIWcX/xr1pote4OVvaGQycVY1KTiQgjMvpC0Q0yJhRq3w==} engines: {node: '>=20.0.0'} '@supabase/phoenix@0.4.0': resolution: {integrity: sha512-RHSx8bHS02xwfHdAbX5Lpbo6PXbgyf7lTaXTlwtFDPwOIw64NnVRwFAXGojHhjtVYI+PEPNSWwkL90f4agN3bw==} - '@supabase/postgrest-js@2.101.1': - resolution: {integrity: sha512-UW1RajH5jbZoK+ldAJ1I6VZ+HWwZ2oaKjEQ6Gn+AQ67CHQVxGl8wNQoLYyumbyaExm41I+wn7arulcY1eHeZJw==} + '@supabase/postgrest-js@2.102.1': + resolution: {integrity: sha512-InLvXKAYf8BIqiv9jWOYudWB3rU8A9uMbcip5BQ5sLLNPrbO1Ekkr79OvlhZBgMNSppxVyC7wPPGzLxMcTZhlA==} engines: {node: '>=20.0.0'} - '@supabase/realtime-js@2.101.1': - resolution: {integrity: sha512-Oa6dno0OB9I+hv5do5zsZHbFu41ViZnE9IWjmkeeF/8fPmB5fWoHGqeTYEC3/0DAgtpUoFJa4FpvzFH0SBHo1Q==} + '@supabase/realtime-js@2.102.1': + resolution: {integrity: sha512-h2fCumib/v6u7XMwSPgxnpfimjX4xCEayUHrxWLC7UurfQjUZJ0pmJDgm6yj80DnUerxuulRghwm5zXYysFG/Q==} engines: {node: '>=20.0.0'} '@supabase/ssr@0.10.0': @@ -9784,12 +9787,12 @@ packages: peerDependencies: '@supabase/supabase-js': ^2.100.1 - '@supabase/storage-js@2.101.1': - resolution: {integrity: sha512-WhTaUOBgeEvnKLy95Cdlp6+D5igSF/65yC727w1olxbet5nzUvMlajKUWyzNtQu2efrz2cQ7FcdVBdQqgT9YKQ==} + '@supabase/storage-js@2.102.1': + resolution: {integrity: sha512-eCL9T4Xpe40nmKlkUJ7Zq/hk34db1xPiT0WL3Iv5MbJqHuCAe5TxhV8Rjqd6DNZrzjtfYObZtYl9jKJaHrivqw==} engines: {node: '>=20.0.0'} - '@supabase/supabase-js@2.101.1': - resolution: {integrity: sha512-Jnhm3LfuACwjIzvk2pfUbGQn7pa7hi6MFzfSyPrRYWVCCu69RPLCFyHSBl7HSBwadbQ3UZOznnD3gPca3ePrRA==} + '@supabase/supabase-js@2.102.1': + resolution: {integrity: sha512-bChxPVeLDnYN9M2d/u4fXsvylwSQG5grAl+HN8f+ZD9a9PuVU+Ru+xGmEsk+b9Iz3rJC9ZQnQUJYQ28fApdWYA==} engines: {node: '>=20.0.0'} '@sveltejs/sv-utils@0.0.3': @@ -11267,6 +11270,7 @@ packages: basic-ftp@5.2.0: resolution: {integrity: sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==} engines: {node: '>=10.0.0'} + deprecated: Security vulnerability fixed in 5.2.1, please upgrade bcrypt@6.0.0: resolution: {integrity: sha512-cU8v/EGSrnH+HnxV2z0J7/blxH8gq7Xh2JFT6Aroax7UohdmiJJlxApMxtKfuI7z68NvvVcmR78k2LbT6efhRg==} @@ -13398,6 +13402,7 @@ packages: freestyle-sandboxes@0.1.6: resolution: {integrity: sha512-zfyJy+DgmheFjCAPYMklo7rpzvuxNP46rB0a9WfNBEmitYGE23nlbjyTy8qdrmVuCVCoMIDQQzzJRkyuh0Szqg==} + deprecated: This package has been deprecated. Please use freestyle instead. fresh@0.5.2: resolution: {integrity: sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==} @@ -22478,16 +22483,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/checkbox@4.3.2(@types/node@24.9.2)': - dependencies: - '@inquirer/ansi': 1.0.2 - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/figures': 1.0.15 - '@inquirer/type': 3.0.10(@types/node@24.9.2) - yoctocolors-cjs: 2.1.3 - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/confirm@5.1.21(@types/node@20.17.6)': dependencies: '@inquirer/core': 10.3.2(@types/node@20.17.6) @@ -22495,13 +22490,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/confirm@5.1.21(@types/node@24.9.2)': - dependencies: - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/type': 3.0.10(@types/node@24.9.2) - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/core@10.3.2(@types/node@20.17.6)': dependencies: '@inquirer/ansi': 1.0.2 @@ -22515,19 +22503,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/core@10.3.2(@types/node@24.9.2)': - dependencies: - '@inquirer/ansi': 1.0.2 - '@inquirer/figures': 1.0.15 - '@inquirer/type': 3.0.10(@types/node@24.9.2) - cli-width: 4.1.0 - mute-stream: 2.0.0 - signal-exit: 4.1.0 - wrap-ansi: 6.2.0 - yoctocolors-cjs: 2.1.3 - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/editor@4.2.23(@types/node@20.17.6)': dependencies: '@inquirer/core': 10.3.2(@types/node@20.17.6) @@ -22536,14 +22511,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/editor@4.2.23(@types/node@24.9.2)': - dependencies: - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/external-editor': 1.0.3(@types/node@24.9.2) - '@inquirer/type': 3.0.10(@types/node@24.9.2) - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/expand@4.0.23(@types/node@20.17.6)': dependencies: '@inquirer/core': 10.3.2(@types/node@20.17.6) @@ -22552,14 +22519,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/expand@4.0.23(@types/node@24.9.2)': - dependencies: - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/type': 3.0.10(@types/node@24.9.2) - yoctocolors-cjs: 2.1.3 - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/external-editor@1.0.3(@types/node@20.17.6)': dependencies: chardet: 2.1.1 @@ -22567,13 +22526,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/external-editor@1.0.3(@types/node@24.9.2)': - dependencies: - chardet: 2.1.1 - iconv-lite: 0.7.0 - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/figures@1.0.15': {} '@inquirer/figures@1.0.3': {} @@ -22585,13 +22537,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/input@4.3.1(@types/node@24.9.2)': - dependencies: - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/type': 3.0.10(@types/node@24.9.2) - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/number@3.0.23(@types/node@20.17.6)': dependencies: '@inquirer/core': 10.3.2(@types/node@20.17.6) @@ -22599,13 +22544,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/number@3.0.23(@types/node@24.9.2)': - dependencies: - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/type': 3.0.10(@types/node@24.9.2) - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/password@4.0.23(@types/node@20.17.6)': dependencies: '@inquirer/ansi': 1.0.2 @@ -22614,14 +22552,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/password@4.0.23(@types/node@24.9.2)': - dependencies: - '@inquirer/ansi': 1.0.2 - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/type': 3.0.10(@types/node@24.9.2) - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/prompts@7.10.1(@types/node@20.17.6)': dependencies: '@inquirer/checkbox': 4.3.2(@types/node@20.17.6) @@ -22637,35 +22567,20 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/prompts@7.10.1(@types/node@24.9.2)': - dependencies: - '@inquirer/checkbox': 4.3.2(@types/node@24.9.2) - '@inquirer/confirm': 5.1.21(@types/node@24.9.2) - '@inquirer/editor': 4.2.23(@types/node@24.9.2) - '@inquirer/expand': 4.0.23(@types/node@24.9.2) - '@inquirer/input': 4.3.1(@types/node@24.9.2) - '@inquirer/number': 3.0.23(@types/node@24.9.2) - '@inquirer/password': 4.0.23(@types/node@24.9.2) - '@inquirer/rawlist': 4.1.11(@types/node@24.9.2) - '@inquirer/search': 3.2.2(@types/node@24.9.2) - '@inquirer/select': 4.4.2(@types/node@24.9.2) - optionalDependencies: - '@types/node': 24.9.2 - - '@inquirer/prompts@7.9.0(@types/node@24.9.2)': + '@inquirer/prompts@7.9.0(@types/node@20.17.6)': dependencies: - '@inquirer/checkbox': 4.3.2(@types/node@24.9.2) - '@inquirer/confirm': 5.1.21(@types/node@24.9.2) - '@inquirer/editor': 4.2.23(@types/node@24.9.2) - '@inquirer/expand': 4.0.23(@types/node@24.9.2) - '@inquirer/input': 4.3.1(@types/node@24.9.2) - '@inquirer/number': 3.0.23(@types/node@24.9.2) - '@inquirer/password': 4.0.23(@types/node@24.9.2) - '@inquirer/rawlist': 4.1.11(@types/node@24.9.2) - '@inquirer/search': 3.2.2(@types/node@24.9.2) - '@inquirer/select': 4.4.2(@types/node@24.9.2) + '@inquirer/checkbox': 4.3.2(@types/node@20.17.6) + '@inquirer/confirm': 5.1.21(@types/node@20.17.6) + '@inquirer/editor': 4.2.23(@types/node@20.17.6) + '@inquirer/expand': 4.0.23(@types/node@20.17.6) + '@inquirer/input': 4.3.1(@types/node@20.17.6) + '@inquirer/number': 3.0.23(@types/node@20.17.6) + '@inquirer/password': 4.0.23(@types/node@20.17.6) + '@inquirer/rawlist': 4.1.11(@types/node@20.17.6) + '@inquirer/search': 3.2.2(@types/node@20.17.6) + '@inquirer/select': 4.4.2(@types/node@20.17.6) optionalDependencies: - '@types/node': 24.9.2 + '@types/node': 20.17.6 '@inquirer/rawlist@4.1.11(@types/node@20.17.6)': dependencies: @@ -22675,14 +22590,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/rawlist@4.1.11(@types/node@24.9.2)': - dependencies: - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/type': 3.0.10(@types/node@24.9.2) - yoctocolors-cjs: 2.1.3 - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/search@3.2.2(@types/node@20.17.6)': dependencies: '@inquirer/core': 10.3.2(@types/node@20.17.6) @@ -22692,15 +22599,6 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/search@3.2.2(@types/node@24.9.2)': - dependencies: - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/figures': 1.0.15 - '@inquirer/type': 3.0.10(@types/node@24.9.2) - yoctocolors-cjs: 2.1.3 - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/select@4.4.2(@types/node@20.17.6)': dependencies: '@inquirer/ansi': 1.0.2 @@ -22711,24 +22609,10 @@ snapshots: optionalDependencies: '@types/node': 20.17.6 - '@inquirer/select@4.4.2(@types/node@24.9.2)': - dependencies: - '@inquirer/ansi': 1.0.2 - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/figures': 1.0.15 - '@inquirer/type': 3.0.10(@types/node@24.9.2) - yoctocolors-cjs: 2.1.3 - optionalDependencies: - '@types/node': 24.9.2 - '@inquirer/type@3.0.10(@types/node@20.17.6)': optionalDependencies: '@types/node': 20.17.6 - '@inquirer/type@3.0.10(@types/node@24.9.2)': - optionalDependencies: - '@types/node': 24.9.2 - '@isaacs/cliui@8.0.2': dependencies: string-width: 5.1.2 @@ -22866,9 +22750,9 @@ snapshots: dependencies: langium: 3.3.1 - '@mintlify/cli@4.0.1090(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@24.9.2)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)': + '@mintlify/cli@4.0.1090(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@20.17.6)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0)': dependencies: - '@inquirer/prompts': 7.9.0(@types/node@24.9.2) + '@inquirer/prompts': 7.9.0(@types/node@20.17.6) '@mintlify/common': 1.0.835(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(react@19.2.3)(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0) '@mintlify/link-rot': 3.0.1010(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(react@19.2.3)(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0) '@mintlify/prebuild': 1.0.977(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(react@19.2.3)(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0) @@ -22881,7 +22765,7 @@ snapshots: front-matter: 4.0.2 fs-extra: 11.2.0 ink: 6.3.0(@types/react@18.3.12)(react@19.2.3) - inquirer: 12.3.0(@types/node@24.9.2) + inquirer: 12.3.0(@types/node@20.17.6) js-yaml: 4.1.0 mdast-util-mdx-jsx: 3.2.0 open: 8.4.2 @@ -29249,21 +29133,21 @@ snapshots: '@stripe/stripe-js@7.7.0': {} - '@supabase/auth-js@2.101.1': + '@supabase/auth-js@2.102.1': dependencies: tslib: 2.8.1 - '@supabase/functions-js@2.101.1': + '@supabase/functions-js@2.102.1': dependencies: tslib: 2.8.1 '@supabase/phoenix@0.4.0': {} - '@supabase/postgrest-js@2.101.1': + '@supabase/postgrest-js@2.102.1': dependencies: tslib: 2.8.1 - '@supabase/realtime-js@2.101.1': + '@supabase/realtime-js@2.102.1': dependencies: '@supabase/phoenix': 0.4.0 '@types/ws': 8.18.1 @@ -29273,23 +29157,23 @@ snapshots: - bufferutil - utf-8-validate - '@supabase/ssr@0.10.0(@supabase/supabase-js@2.101.1)': + '@supabase/ssr@0.10.0(@supabase/supabase-js@2.102.1)': dependencies: - '@supabase/supabase-js': 2.101.1 + '@supabase/supabase-js': 2.102.1 cookie: 1.0.2 - '@supabase/storage-js@2.101.1': + '@supabase/storage-js@2.102.1': dependencies: iceberg-js: 0.8.1 tslib: 2.8.1 - '@supabase/supabase-js@2.101.1': + '@supabase/supabase-js@2.102.1': dependencies: - '@supabase/auth-js': 2.101.1 - '@supabase/functions-js': 2.101.1 - '@supabase/postgrest-js': 2.101.1 - '@supabase/realtime-js': 2.101.1 - '@supabase/storage-js': 2.101.1 + '@supabase/auth-js': 2.102.1 + '@supabase/functions-js': 2.102.1 + '@supabase/postgrest-js': 2.102.1 + '@supabase/realtime-js': 2.102.1 + '@supabase/storage-js': 2.102.1 transitivePeerDependencies: - bufferutil - utf-8-validate @@ -30017,6 +29901,7 @@ snapshots: '@types/node@24.9.2': dependencies: undici-types: 7.16.0 + optional: true '@types/nodemailer@6.4.15': dependencies: @@ -33134,7 +33019,7 @@ snapshots: debug: 4.4.3 enhanced-resolve: 5.17.1 eslint: 8.57.1 - eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1) + eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) fast-glob: 3.3.3 get-tsconfig: 4.8.1 is-bun-module: 1.2.1 @@ -33177,7 +33062,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1): + eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1): dependencies: debug: 3.2.7 optionalDependencies: @@ -33255,7 +33140,7 @@ snapshots: doctrine: 2.1.0 eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 - eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1) + eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) hasown: 2.0.2 is-core-module: 2.15.1 is-glob: 4.0.3 @@ -35159,12 +35044,12 @@ snapshots: react: 19.2.3 react-dom: 19.2.3(react@19.2.3) - inquirer@12.3.0(@types/node@24.9.2): + inquirer@12.3.0(@types/node@20.17.6): dependencies: - '@inquirer/core': 10.3.2(@types/node@24.9.2) - '@inquirer/prompts': 7.10.1(@types/node@24.9.2) - '@inquirer/type': 3.0.10(@types/node@24.9.2) - '@types/node': 24.9.2 + '@inquirer/core': 10.3.2(@types/node@20.17.6) + '@inquirer/prompts': 7.10.1(@types/node@20.17.6) + '@inquirer/type': 3.0.10(@types/node@20.17.6) + '@types/node': 20.17.6 ansi-escapes: 4.3.2 mute-stream: 2.0.0 run-async: 3.0.0 @@ -36641,9 +36526,9 @@ snapshots: dependencies: minipass: 7.1.2 - mint@4.2.487(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@24.9.2)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0): + mint@4.2.487(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@20.17.6)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0): dependencies: - '@mintlify/cli': 4.0.1090(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@24.9.2)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0) + '@mintlify/cli': 4.0.1090(@radix-ui/react-popover@1.1.15(@types/react-dom@18.3.1)(@types/react@18.3.12)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(@types/node@20.17.6)(@types/react@18.3.12)(encoding@0.1.13)(react-dom@19.2.3(react@19.2.3))(tsx@4.19.3)(typescript@5.9.3)(yaml@2.6.0) transitivePeerDependencies: - '@radix-ui/react-popover' - '@types/node' @@ -37086,7 +36971,7 @@ snapshots: jsonpath-plus: 10.4.0 lodash.topath: 4.5.2 - nitro@3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(rolldown@1.0.0-rc.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2): + nitro@3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2): dependencies: consola: 3.4.2 cookie-es: 2.0.0 @@ -37106,7 +36991,6 @@ snapshots: unenv: 2.0.0-rc.21 unstorage: 2.0.0-alpha.3(chokidar@4.0.3)(db0@0.3.4(@electric-sql/pglite@0.3.2)(mysql2@3.15.3))(lru-cache@11.2.2)(ofetch@1.5.1) optionalDependencies: - rolldown: 1.0.0-rc.3 vite: 7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0) xml2js: 0.6.2 transitivePeerDependencies: @@ -40784,7 +40668,8 @@ snapshots: undici-types@6.21.0: {} - undici-types@7.16.0: {} + undici-types@7.16.0: + optional: true undici@6.19.8: {} From 6021a04bdefd45c3a36b4ff7d9cdb6233c1fbad0 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 13:28:51 -0700 Subject: [PATCH 04/25] build QEMU 10.2.2 from source in CI for mapped-ram support Ubuntu 24.04 (ubicloud-standard-8) ships QEMU 8.2, which predates the mapped-ram migration capability used by the fast-resume snapshot path. Compile 10.2.2 once per runner image and cache the resulting /opt/qemu so subsequent runs are fast. --- .github/workflows/qemu-emulator-build.yaml | 76 ++++++++++++++++++++-- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml index 4bb738124d..9d1078f0c6 100644 --- a/.github/workflows/qemu-emulator-build.yaml +++ b/.github/workflows/qemu-emulator-build.yaml @@ -66,10 +66,48 @@ jobs: node-version: 22 cache: pnpm - - name: Install QEMU dependencies + - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64 zstd + # qemu-utils gives us qemu-img; qemu-efi-aarch64 provides the arm64 + # UEFI firmware. The actual qemu-system-* binaries come from the + # source build below — Ubuntu 24.04 ships QEMU 8.2 which predates + # the mapped-ram migration capability we rely on. + sudo apt-get install -y qemu-utils qemu-efi-aarch64 socat genisoimage zstd \ + ninja-build pkg-config python3-venv \ + libglib2.0-dev libpixman-1-dev libslirp-dev libepoxy-dev libgbm-dev + + # QEMU 10.2.2 is required for the mapped-ram + multifd migration path + # used by the fast-resume snapshot. Cache the compiled prefix so CI + # only pays the ~5-8 min build cost once per runner image. + - name: Restore QEMU 10.2.2 cache + id: qemu-cache + uses: actions/cache@v4 + with: + path: /opt/qemu + key: qemu-10.2.2-${{ runner.os }}-${{ runner.arch }}-v1 + + - name: Build QEMU 10.2.2 from source + if: steps.qemu-cache.outputs.cache-hit != 'true' + run: | + set -euxo pipefail + curl -fsSL https://download.qemu.org/qemu-10.2.2.tar.xz -o /tmp/qemu.tar.xz + mkdir -p /tmp/qemu-src + tar -xf /tmp/qemu.tar.xz -C /tmp/qemu-src --strip-components=1 + cd /tmp/qemu-src + ./configure --prefix=/opt/qemu \ + --target-list=x86_64-softmmu,aarch64-softmmu \ + --enable-kvm --enable-slirp --enable-tcg \ + --disable-docs --disable-gtk --disable-sdl --disable-vnc \ + --disable-guest-agent --disable-tools + make -j"$(nproc)" + sudo make install + + - name: Put QEMU 10.2.2 on PATH + run: | + echo "/opt/qemu/bin" >> "$GITHUB_PATH" + /opt/qemu/bin/qemu-system-x86_64 --version + /opt/qemu/bin/qemu-system-aarch64 --version - name: Enable KVM access run: | @@ -165,10 +203,40 @@ jobs: steps: - uses: actions/checkout@v6 - - name: Install QEMU dependencies + - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y qemu-system-x86 qemu-utils socat zstd + sudo apt-get install -y qemu-utils socat zstd \ + ninja-build pkg-config python3-venv \ + libglib2.0-dev libpixman-1-dev libslirp-dev libepoxy-dev libgbm-dev + + - name: Restore QEMU 10.2.2 cache + id: qemu-cache + uses: actions/cache@v4 + with: + path: /opt/qemu + key: qemu-10.2.2-${{ runner.os }}-${{ runner.arch }}-v1 + + - name: Build QEMU 10.2.2 from source + if: steps.qemu-cache.outputs.cache-hit != 'true' + run: | + set -euxo pipefail + curl -fsSL https://download.qemu.org/qemu-10.2.2.tar.xz -o /tmp/qemu.tar.xz + mkdir -p /tmp/qemu-src + tar -xf /tmp/qemu.tar.xz -C /tmp/qemu-src --strip-components=1 + cd /tmp/qemu-src + ./configure --prefix=/opt/qemu \ + --target-list=x86_64-softmmu,aarch64-softmmu \ + --enable-kvm --enable-slirp --enable-tcg \ + --disable-docs --disable-gtk --disable-sdl --disable-vnc \ + --disable-guest-agent --disable-tools + make -j"$(nproc)" + sudo make install + + - name: Put QEMU 10.2.2 on PATH + run: | + echo "/opt/qemu/bin" >> "$GITHUB_PATH" + /opt/qemu/bin/qemu-system-x86_64 --version - uses: pnpm/action-setup@v4 with: From 0c0d726b2859fa6447fd395ba51ee6542dde03aa Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 13:50:55 -0700 Subject: [PATCH 05/25] build stack-cli's workspace deps in emulator CI Switch the CLI build step from `pnpm --filter @stackframe/stack-cli run build` to `turbo run build --filter=@stackframe/stack-cli...` so that stack-cli's workspace dependencies (@stackframe/js and @stackframe/stack-shared) also get compiled to their dist/ outputs. Without them, `node dist/index.js` fails with ERR_MODULE_NOT_FOUND at import time. --- .github/workflows/qemu-emulator-build.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml index 9d1078f0c6..3ed56b1472 100644 --- a/.github/workflows/qemu-emulator-build.yaml +++ b/.github/workflows/qemu-emulator-build.yaml @@ -138,8 +138,11 @@ jobs: - name: Build stack-cli (for emulator CLI) if: matrix.arch == 'amd64' run: | - pnpm install --frozen-lockfile --filter @stackframe/stack-cli... - pnpm --filter @stackframe/stack-cli run build + pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...' + # Turbo's trailing `...` filter builds stack-cli AND its workspace + # deps (@stackframe/js, @stackframe/stack-shared, etc.) — stack-cli + # imports them at runtime from their dist/ outputs. + pnpm exec turbo run build --filter='@stackframe/stack-cli...' - name: Start emulator and verify if: matrix.arch == 'amd64' @@ -249,8 +252,11 @@ jobs: - name: Install stack-cli deps + build run: | - pnpm install --frozen-lockfile --filter @stackframe/stack-cli... - pnpm --filter @stackframe/stack-cli run build + pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...' + # Turbo's trailing `...` filter builds stack-cli AND its workspace + # deps (@stackframe/js, @stackframe/stack-shared, etc.) — stack-cli + # imports them at runtime from their dist/ outputs. + pnpm exec turbo run build --filter='@stackframe/stack-cli...' - name: Download built image uses: actions/download-artifact@v4 From b03486e1c800dda9163438fe2a2b72bab6dee92b Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 13:59:38 -0700 Subject: [PATCH 06/25] fix emulator pull --pr/--run snapshot detection First downloadArtifactByName already extracts both qcow2 and savevm.zst from the single qemu-emulator-${arch} artifact; the second lookup for a nonexistent -savevm artifact always failed and produced a misleading 'fast-start disabled' message. --- packages/stack-cli/src/commands/emulator.ts | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts index 5967a11038..00e8fdae60 100644 --- a/packages/stack-cli/src/commands/emulator.ts +++ b/packages/stack-cli/src/commands/emulator.ts @@ -518,17 +518,13 @@ export function registerEmulatorCommand(program: Command) { } if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`); console.log(`Downloaded: ${dest}`); - // Snapshot artifact is optional — older CI builds may not produce it. - let snapshotDownloaded = false; - try { - snapshotDownloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}-savevm`, imageDir); - } catch (err) { - console.log(`Snapshot artifact unavailable for run ${runId}: ${err instanceof Error ? err.message : err}`); - } - if (snapshotDownloaded && existsSync(snapshotDest)) { + // CI publishes both files inside the single qemu-emulator-${arch} + // artifact, so the first download already extracts the snapshot when + // present. Older builds may not include it. + if (existsSync(snapshotDest)) { console.log(`Downloaded: ${snapshotDest}`); - } else if (!snapshotDownloaded) { - console.log(`Snapshot artifact not available for run ${runId}; fast-start disabled.`); + } else { + console.log(`Snapshot not present in artifact for run ${runId}; fast-start disabled.`); } } else { await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag }); From 0b3a9cfaccaf8c4efab6c222113f3253b2f21950 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 14:07:34 -0700 Subject: [PATCH 07/25] fix sentinel marker path in docker/server entrypoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docker/server image runs as the unprivileged `node` user, which cannot write to /var/run. With `set -e` at the top of the script, the failed `touch` aborted execution after sentinel replacement but before the backend/dashboard were started — the Check server health CI step then saw connection refused on ports 8101/8102. Move the marker into $WORK_DIR (which is already created and owned by the running user). The emulator snapshot-resume path still benefits: the marker persists across supervisorctl restarts because $WORK_DIR lives on the container filesystem. --- docker/server/entrypoint.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 975c18975e..05072c330d 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -144,8 +144,10 @@ fi # The full-tree sentinel scan is expensive (several seconds over the whole built # app tree). On a fast-restart — triggered by the emulator snapshot rotation # path — the placeholders have already been sed-replaced by rotate-secrets, -# and no new sentinels need substitution. Skip the scan in that case. -SENTINEL_MARKER=/var/run/stack-local-sentinels-replaced +# and no new sentinels need substitution. Skip the scan in that case. Marker +# lives in WORK_DIR because the docker/server image runs as the unprivileged +# `node` user and cannot write to /var/run. +SENTINEL_MARKER="$WORK_DIR/.stack-sentinels-replaced" if [ -f "$SENTINEL_MARKER" ]; then echo "Sentinels already replaced on a previous start; skipping scan." else @@ -182,7 +184,7 @@ else # Now replace the sentinel with the (properly escaped) value in all files in the working directory. find $WORK_DIR/apps -type f -exec sed -i "s${delimiter}${escaped_sentinel}${delimiter}${escaped_value}${delimiter}g" {} + done - mkdir -p "$(dirname "$SENTINEL_MARKER")" && touch "$SENTINEL_MARKER" + touch "$SENTINEL_MARKER" fi # ============= START BACKEND AND DASHBOARD ============= From 2c8ad4c77a9588dad508351b4b1e7998a0f2aa9c Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 14:30:21 -0700 Subject: [PATCH 08/25] address unresolved PR review comments on snapshot resume path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - stop_vm no longer deletes runtime-config.iso; the CLI owns its lifecycle and the snapshot → cold-boot fallback needs it preserved (cmd_reset still wipes RUN_DIR for a full reset). Also sweeps qga.sock. - Write internal-pck to \$VM_DIR on the host in snapshot mode. Cold boot publishes this via virtfs/9p; snapshot mode drops virtfs, so --config-file flows would otherwise hang. Handles both the rotation path (fresh PCK) and EMULATOR_NO_ROTATION (placeholder PCK). - Pin RAM in snapshot mode to the build-time 4096 (overridable via EMULATOR_SNAPSHOT_RAM). Migration replay requires an identical -m value, same constraint as CPU count. - Fail amd64 build when .savevm.zst is missing rather than shipping a cold-boot-only release silently. arm64 stays best-effort for now because it runs under TCG and can't be verified end-to-end. - Install Node/pnpm on both arches. arm64 also runs generate-env-development.mjs, which otherwise relied on the runner image's preinstalled Node. --- .github/workflows/qemu-emulator-build.yaml | 13 ++++- docker/local-emulator/qemu/run-emulator.sh | 64 ++++++++++++++++------ 2 files changed, 57 insertions(+), 20 deletions(-) diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml index 3ed56b1472..e2298401d8 100644 --- a/.github/workflows/qemu-emulator-build.yaml +++ b/.github/workflows/qemu-emulator-build.yaml @@ -55,13 +55,14 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + # Node/pnpm are needed on both arches: arm64 also runs + # generate-env-development.mjs inside build-image.sh. amd64 additionally + # builds and runs the CLI for the verification steps below. - uses: pnpm/action-setup@v4 - if: matrix.arch == 'amd64' with: version: 10.23.0 - uses: actions/setup-node@v4 - if: matrix.arch == 'amd64' with: node-version: 22 cache: pnpm @@ -177,8 +178,14 @@ jobs: if [ -f "$SAVEVM" ]; then cp "$SAVEVM" "stack-emulator-${{ matrix.arch }}.savevm.zst" ls -lh "stack-emulator-${{ matrix.arch }}.savevm.zst" + elif [ "${{ matrix.arch }}" = "amd64" ]; then + # amd64 is the fast-resume contract: if the build didn't produce a + # snapshot, fail loudly rather than silently shipping a + # cold-boot-only release. + echo "ERROR: snapshot build expected to produce $SAVEVM for amd64." >&2 + exit 1 else - echo "NOTE: no savevm snapshot was produced; fast-start will be unavailable for this arch." + echo "NOTE: no savevm snapshot was produced for ${{ matrix.arch }}; fast-start will be unavailable for this arch." fi - name: Upload image artifact diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh index 12564f369d..a49b10b428 100755 --- a/docker/local-emulator/qemu/run-emulator.sh +++ b/docker/local-emulator/qemu/run-emulator.sh @@ -308,7 +308,7 @@ build_qemu_cmd() { # build and are not needed at runtime, but their virtio-blk slots must # exist so the migration replay matches device IDs. Runtime-only devices # (virtfs, balloon) live at higher slots — extra at destination is fine. - local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" + local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" snapshot_ram="$VM_RAM" if snapshot_available; then log "Snapshot found at $savevm_file — fast-resume enabled." # -incoming defer: QEMU starts, waits for a QMP migrate-incoming command. @@ -316,9 +316,17 @@ build_qemu_cmd() { # which enables parallel RAM restore (~2-3x faster than streamed decode). snapshot_args+=(-incoming defer) snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}" + # RAM size is baked into the snapshot; migration replay requires an + # identical -m value. Pin to the build-time RAM (4096) and ignore + # EMULATOR_RAM — override via EMULATOR_SNAPSHOT_RAM if a different + # snapshot was produced. + snapshot_ram="${EMULATOR_SNAPSHOT_RAM:-4096}" if [ "$snapshot_smp" != "$VM_CPUS" ]; then log "Pinning SMP to ${snapshot_smp} for snapshot resume (build-time value)." fi + if [ "$snapshot_ram" != "$VM_RAM" ]; then + log "Pinning RAM to ${snapshot_ram}MB for snapshot resume (ignoring EMULATOR_RAM=${VM_RAM})." + fi # Tiny placeholder ISOs to match the seed.iso / bundle.iso slots present # at snapshot time. Their content doesn't matter (cloud-init has already @@ -351,7 +359,7 @@ build_qemu_cmd() { -cpu "$cpu" "${firmware_args[@]}" -boot order=c - -m "$VM_RAM" + -m "$snapshot_ram" -smp "$snapshot_smp" -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio" "${runtime_only_args[@]}" @@ -502,14 +510,17 @@ qmp_incoming_and_cont() { return 1 } -# Generate fresh per-install secrets on the host. We pass them to the guest -# through QGA's guest-exec input-data field (base64-encoded), so no host file -# or virtfs mount is needed in the snapshot path. -generate_fresh_secrets_payload() { - printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$(openssl rand -hex 32)" - printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$(openssl rand -hex 32)" - printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$(openssl rand -hex 32)" - printf 'CRON_SECRET=%s\n' "$(openssl rand -hex 32)" +# Placeholder PCK baked into the snapshot. Kept in sync with the value in +# docker/local-emulator/qemu/cloud-init/emulator/user-data. +SNAPSHOT_PLACEHOLDER_PCK="00000000000000000000000000000000ffffffffffffffffffffffffffffffff" + +# Write the internal PCK to the host path the CLI reads (see +# readInternalPck() in packages/stack-cli/src/commands/emulator.ts). In +# cold-boot mode the guest publishes this via virtfs/9p, but snapshot mode +# drops virtfs, so the host has to write it itself. +write_internal_pck_for_cli() { + local pck="$1" + (umask 077 && printf '%s' "$pck" > "$VM_DIR/internal-pck") } # Drive qemu-guest-agent via its virtserialport socket. QGA speaks the same @@ -547,8 +558,22 @@ qga_trigger_fast_rotate() { # message is available in serial.log. We pipe the fresh-secrets env file # (as base64) to the script via input-data — keeps secrets off the # filesystem and avoids needing virtfs. - local secrets_b64 resp pid - secrets_b64=$(generate_fresh_secrets_payload | base64 | tr -d '\n') + local fresh_pck fresh_ssk fresh_sak fresh_cron payload secrets_b64 resp pid + fresh_pck="$(openssl rand -hex 32)" + fresh_ssk="$(openssl rand -hex 32)" + fresh_sak="$(openssl rand -hex 32)" + fresh_cron="$(openssl rand -hex 32)" + payload=$( + printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$fresh_pck" + printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$fresh_ssk" + printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$fresh_sak" + printf 'CRON_SECRET=%s\n' "$fresh_cron" + ) + # Publish the fresh PCK to the host path the CLI reads. Writing before the + # guest-exec so a --config-file flow that polls from another process can + # pick it up the moment rotation completes. + write_internal_pck_for_cli "$fresh_pck" + secrets_b64=$(printf '%s' "$payload" | base64 | tr -d '\n') local cmd cmd=$(printf '{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/trigger-fast-rotate","capture-output":true,"input-data":"%s"}}' "$secrets_b64") resp=$(printf '%s\n' "$cmd" | qga_send || true) @@ -599,8 +624,11 @@ stop_vm() { kill -9 "$pid" 2>/dev/null || true fi fi - rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/serial.log" - rm -f "$VM_DIR/runtime-config.iso" + rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock" "$VM_DIR/serial.log" + # Do NOT remove runtime-config.iso: the CLI owns its lifecycle and run-emulator.sh + # cannot regenerate it. Removing here breaks the snapshot → cold-boot fallback + # (which calls stop_vm before recursing into cmd_start → ensure_runtime_config_iso). + # `cmd_reset` wipes $RUN_DIR entirely when a full reset is wanted. } cmd_start() { @@ -642,6 +670,9 @@ cmd_start() { if [ "$EMULATOR_NO_ROTATION" = "1" ]; then warn "EMULATOR_NO_ROTATION=1: snapshot's placeholder secrets are in effect — do not expose this instance." + # The placeholder PCK is live in the running image; publish it to the + # host path so --config-file flows still work. + write_internal_pck_for_cli "$SNAPSHOT_PLACEHOLDER_PCK" if ! wait_for_condition "services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then warn "Services did not respond after resume — falling back to cold boot." tail_vm_logs @@ -691,9 +722,8 @@ cmd_start() { snapshot_fallback_to_cold_boot() { warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..." stop_vm - # Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one, - # but keep the CLI-generated runtime-config.iso (we can't regenerate it - # from shell — the CLI owns that). + # Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one. + # runtime-config.iso is preserved by stop_vm (the CLI owns it). rm -f "$VM_DIR/disk.qcow2" "$VM_DIR/base-image.fingerprint" \ "$VM_DIR/seed.phantom" "$VM_DIR/bundle.phantom" EMULATOR_NO_SNAPSHOT=1 From 76f954353673e7e1e46bd73087039dba124027e8 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 14:42:20 -0700 Subject: [PATCH 09/25] simplify emulator fast-start: tighter polls, drop dead wrappers - run-emulator.sh: drop wait_for_condition poll interval from 1s to 0.2s - emulator.ts: replace existsSync+readFileSync TOCTOU in readInternalPck with try/ENOENT; tighten initial backoff to 50ms; drop redundant mkdirSync in startEmulator; surface stop-failure on stderr instead of swallowing silently - iso.ts: inline trivial buildRootDirRecordInVD wrapper --- docker/local-emulator/qemu/run-emulator.sh | 2 +- packages/stack-cli/src/commands/emulator.ts | 16 +++++++++------- packages/stack-cli/src/lib/iso.ts | 14 +++++--------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh index a49b10b428..75cbd3a4b4 100755 --- a/docker/local-emulator/qemu/run-emulator.sh +++ b/docker/local-emulator/qemu/run-emulator.sh @@ -212,7 +212,7 @@ wait_for_condition() { log "${label} ready in ${elapsed}s" return 0 fi - sleep 1 + sleep 0.2 elapsed=$((SECONDS - started)) printf "\r [%3ds] %s..." "$elapsed" "$label" done diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts index 00e8fdae60..3833cffeeb 100644 --- a/packages/stack-cli/src/commands/emulator.ts +++ b/packages/stack-cli/src/commands/emulator.ts @@ -57,11 +57,13 @@ function internalPckPath(): string { async function readInternalPck(timeoutMs = 60_000): Promise { const path = internalPckPath(); const deadline = Date.now() + timeoutMs; - let delay = 250; + let delay = 50; while (Date.now() < deadline) { - if (existsSync(path)) { + try { const contents = readFileSync(path, "utf-8").trim(); if (contents) return contents; + } catch (e) { + if ((e as NodeJS.ErrnoException).code !== "ENOENT") throw e; } await new Promise((r) => setTimeout(r, delay)); delay = Math.min(delay * 2, 2000); @@ -223,7 +225,6 @@ function isEmulatorRunning(): boolean { } async function startEmulator(arch: "arm64" | "amd64"): Promise { - mkdirSync(emulatorImageDir(), { recursive: true }); const img = join(emulatorImageDir(), `stack-emulator-${arch}.qcow2`); if (!existsSync(img)) { console.log("No emulator image found. Pulling latest..."); @@ -518,9 +519,6 @@ export function registerEmulatorCommand(program: Command) { } if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`); console.log(`Downloaded: ${dest}`); - // CI publishes both files inside the single qemu-emulator-${arch} - // artifact, so the first download already extracts the snapshot when - // present. Older builds may not include it. if (existsSync(snapshotDest)) { console.log(`Downloaded: ${snapshotDest}`); } else { @@ -617,8 +615,12 @@ export function registerEmulatorCommand(program: Command) { process.exit(exitCode); } else { console.log("\nStopping emulator..."); + const warnStopFailed = (e: unknown) => { + const msg = e instanceof Error ? e.message : String(e); + process.stderr.write(`Failed to stop emulator cleanly: ${msg}\n`); + }; runEmulator("stop") - .catch(() => { /* best-effort stop */ }) + .catch(warnStopFailed) .finally(() => process.exit(exitCode)); } }); diff --git a/packages/stack-cli/src/lib/iso.ts b/packages/stack-cli/src/lib/iso.ts index b226af0bc7..6b8ac1bb12 100644 --- a/packages/stack-cli/src/lib/iso.ts +++ b/packages/stack-cli/src/lib/iso.ts @@ -259,13 +259,6 @@ function buildVolumeDescriptorTerminator(): Buffer { return buf; } -// Builds the 34-byte root directory record that lives inside the volume -// descriptor (BP 157-190 of PVD/SVD). Identical layout to a regular directory -// record but identifier is the single byte 0x00. -function buildRootDirRecordInVD(rootSector: number, rootSize: number, recDate: Buffer): Buffer { - return buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x00])); -} - export type IsoFile = { name: string, data: Buffer }; export function buildIso(volumeId: string, files: IsoFile[]): Buffer { @@ -317,8 +310,11 @@ export function buildIso(volumeId: string, files: IsoFile[]): Buffer { const totalSectors = nextSector; const pathTableSize = 10; - const isoRootDirRecordVD = buildRootDirRecordInVD(isoRootSector, SECTOR, recDate); - const jolietRootDirRecordVD = buildRootDirRecordInVD(jolietRootSector, SECTOR, recDate); + // Root directory record inside the volume descriptor (BP 157-190 of PVD/SVD): + // same layout as a regular dir record but the identifier is the single byte 0x00. + const rootIdent = Buffer.from([0x00]); + const isoRootDirRecordVD = buildDirRecord(isoRootSector, SECTOR, true, recDate, rootIdent); + const jolietRootDirRecordVD = buildDirRecord(jolietRootSector, SECTOR, true, recDate, rootIdent); const pvd = buildVolumeDescriptor({ joliet: false, From 3586115b0e8b9f99325d3db218f47164b9370d9d Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 16:07:26 -0700 Subject: [PATCH 10/25] fix snapshot resume host fs + restore standalone run-emulator.sh path Snapshot-resume mode dropped virtfs (QEMU disallows migration with 9p mounted), leaving the container's /host bind mount empty so the /local-emulator/project route returned 400. Hot-plug virtio-9p over a pre-attached PCIe root port after resume, mount in guest via QGA, and make /host a shared mount point with rshared docker propagation so the new mount reaches the running container without restart. run-emulator.sh ensure_runtime_config_iso now falls back to in-script ISO generation when invoked outside the CLI (fixes pnpm emulator:start). Also propagate fresh emulator credentials to VITE_/EXPO_PUBLIC_ env var prefixes in the CLI run command. --- docker/local-emulator/qemu/build-image.sh | 5 + .../qemu/cloud-init/emulator/user-data | 41 +++++-- docker/local-emulator/qemu/run-emulator.sh | 107 +++++++++++++++++- packages/stack-cli/src/commands/emulator.ts | 6 + 4 files changed, 148 insertions(+), 11 deletions(-) diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh index 84ab0fa341..295a7972ee 100755 --- a/docker/local-emulator/qemu/build-image.sh +++ b/docker/local-emulator/qemu/build-image.sh @@ -477,6 +477,11 @@ build_one() { -device virtio-serial -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0" -drive "file=$runtime_iso,format=raw,if=virtio,readonly=on" + # Empty PCIe root port reserved for runtime hot-plug of virtio-9p. + # The integrated pcie.0 bus on q35 / arm64-virt is static — hotplug + # only works through a root port. Must be present at snapshot capture + # so the resumed device tree matches. + -device "pcie-root-port,id=hostfs-port,bus=pcie.0,chassis=1" ) # QEMU disallows migration when virtfs is mounted in the guest — virtfs # has guest-side state (open handles, mount table) that isn't migratable. diff --git a/docker/local-emulator/qemu/cloud-init/emulator/user-data b/docker/local-emulator/qemu/cloud-init/emulator/user-data index 8a968bf079..b3c21527b4 100644 --- a/docker/local-emulator/qemu/cloud-init/emulator/user-data +++ b/docker/local-emulator/qemu/cloud-init/emulator/user-data @@ -166,15 +166,40 @@ write_files: permissions: '0755' content: | #!/bin/bash + # Mount the host filesystem at /host. Two modes: + # (no args) — cold-boot: bind /host on itself, make it a shared + # mount point, then mount virtio-9p on top. The + # bind+shared step is what lets the docker bind + # mount (-v /host:/host:rshared) receive later + # propagation events. + # --post-resume — snapshot-resume: /host is already shared (set up + # at build time and preserved across the snapshot, + # plus the docker bind mount has rshared + # propagation). The host has just hot-plugged + # virtio-9p; mount it on /host and the new mount + # propagates into the running container. set -uo pipefail mkdir -p /host - if mountpoint -q /host; then - exit 0 + + # Idempotent: bind /host on itself once so it becomes a mount point + # with its own propagation, then make it shared. mount --make-shared + # requires a mount point, hence the bind first. + if ! mountpoint -q /host; then + mount --bind /host /host + fi + mount --make-shared /host + + if [ "${1:-}" = "--post-resume" ]; then + if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host; then + exit 0 + fi + echo "post-resume 9p mount failed" >&2 + exit 1 fi - # In snapshot-build mode the host detaches virtfs (QEMU disallows - # migration while it's mounted), and at runtime we re-attach it. Tolerate - # both states: try to mount, fall through to an empty /host if no - # virtio-9p channel is available. + + # Cold boot. In snapshot-build mode the host detaches virtfs (QEMU + # disallows migration while it's mounted), so the 9p mount may not be + # available — tolerate that and fall through to an empty /host. if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host 2>/dev/null; then exit 0 fi @@ -220,7 +245,7 @@ write_files: -v stack-clickhouse-data:/data/clickhouse \ -v stack-minio-data:/data/minio \ -v stack-inbucket-data:/data/inbucket \ - -v /host:/host \ + -v /host:/host:rshared \ stack-local-emulator 2>&1 | tee -a "$host_log" else exec docker run \ @@ -234,7 +259,7 @@ write_files: -v stack-clickhouse-data:/data/clickhouse \ -v stack-minio-data:/data/minio \ -v stack-inbucket-data:/data/inbucket \ - -v /host:/host \ + -v /host:/host:rshared \ stack-local-emulator fi diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh index 75cbd3a4b4..72f095cf57 100755 --- a/docker/local-emulator/qemu/run-emulator.sh +++ b/docker/local-emulator/qemu/run-emulator.sh @@ -155,11 +155,34 @@ runtime_fingerprint() { ensure_runtime_config_iso() { local cfg_iso cfg_iso="$(runtime_iso_path)" - if [ ! -s "$cfg_iso" ]; then - err "Runtime config ISO missing at $cfg_iso." - err "The CLI normally generates this; if you're invoking run-emulator.sh directly, run via 'stack emulator start' instead." + if [ -s "$cfg_iso" ]; then + return 0 + fi + + # Fallback used when this script is invoked directly (e.g. `pnpm + # emulator:start`) rather than through the stack-cli, which generates the + # ISO via packages/stack-cli/src/lib/iso.ts. Mirrors the field set + volume + # label so the guest's render-stack-env mounts it the same way. + local base_env="$SCRIPT_DIR/../.env.development" + if [ ! -f "$base_env" ]; then + err "Cannot generate runtime config ISO: $base_env is missing." + err "Run 'pnpm run emulator:generate-env' first, or invoke via 'stack emulator start'." exit 1 fi + + local cfg_dir="$VM_DIR/runtime-config" + rm -rf "$cfg_dir" + mkdir -p "$cfg_dir" + { + printf "STACK_EMULATOR_PORT_PREFIX=%s\n" "$PORT_PREFIX" + printf "STACK_EMULATOR_DASHBOARD_HOST_PORT=%s\n" "$EMULATOR_DASHBOARD_PORT" + printf "STACK_EMULATOR_BACKEND_HOST_PORT=%s\n" "$EMULATOR_BACKEND_PORT" + printf "STACK_EMULATOR_MINIO_HOST_PORT=%s\n" "$EMULATOR_MINIO_PORT" + printf "STACK_EMULATOR_INBUCKET_HOST_PORT=%s\n" "$EMULATOR_INBUCKET_PORT" + printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$VM_DIR" + } > "$cfg_dir/runtime.env" + cp "$base_env" "$cfg_dir/base.env" + make_iso_from_dir "$cfg_iso" "STACKCFG" "$cfg_dir" } service_is_up() { @@ -371,6 +394,18 @@ build_qemu_cmd() { -chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0" -device virtio-serial -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0" + # Empty PCIe root port reserved for runtime hot-plug of virtio-9p. + # MUST be the last explicit -device entry — slot order has to mirror + # build-image.sh exactly or migration replay stalls in inmigrate. + -device "pcie-root-port,id=hostfs-port,bus=pcie.0,chassis=1" + # Pre-create the host-side fsdev backend so the post-resume QMP + # device_add can attach to it by id. -fsdev is host-only state — not + # part of the migrated device tree — so it's safe to add here even + # though the snapshot was captured without it. Going through -fsdev + # avoids the HMP fsdev_add command, whose error path is invisible + # via human-monitor-command (errors come back as a return string, + # not a QMP error). + -fsdev "local,id=hostfs,path=/,security_model=none" "${snapshot_args[@]}" -serial "file:$VM_DIR/serial.log" -display none @@ -552,6 +587,56 @@ qga_wait_ready() { return 1 } +# Hot-plug a virtio-9p device backed by host `/` after a snapshot resume. +# The snapshot was captured WITHOUT virtfs (QEMU disallows migration while +# 9p is mounted in the guest), so the resumed VM has no host filesystem +# available until we add one here. The fsdev backend was pre-created by +# the -fsdev option in build_qemu_cmd; we only need the device_add half. +qmp_hotplug_9p() { + local resp + resp=$(printf '%s\n' \ + '{"execute":"device_add","arguments":{"driver":"virtio-9p-pci","id":"hostfs-dev","fsdev":"hostfs","mount_tag":"hostfs","bus":"hostfs-port"}}' \ + | qmp_send) + if printf '%s' "$resp" | grep -q '"error"'; then + err "QMP device_add virtio-9p-pci failed: $resp" + return 1 + fi + return 0 +} + +# Run /usr/local/bin/mount-host-fs --post-resume in the guest. The script +# mounts the freshly-hot-plugged 9p device on /host, which is a shared +# mount point — so the new mount propagates into the running stack +# container's `-v /host:/host:rshared` bind mount without a container +# restart. +qga_mount_host_fs() { + local cmd resp pid status_resp exited exitcode + cmd='{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/mount-host-fs","arg":["--post-resume"],"capture-output":true}}' + resp=$(printf '%s\n' "$cmd" | qga_send || true) + pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/') + if [ -z "$pid" ]; then + err "guest-exec mount-host-fs did not return a pid; response: $resp" + return 1 + fi + local deadline=$((SECONDS + 20)) + while [ "$SECONDS" -lt "$deadline" ]; do + status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true) + exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/') + if [ "$exited" = "true" ]; then + exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/') + if [ "${exitcode:-0}" = "0" ]; then + log "host fs mounted in guest" + return 0 + fi + err "mount-host-fs exited with code ${exitcode:-unknown}; response: $status_resp" + return 1 + fi + sleep 0.2 + done + err "mount-host-fs did not complete within 20s" + return 1 +} + qga_trigger_fast_rotate() { # guest-exec returns a pid; we then poll guest-exec-status until the # process exits, and surface its exit code. Capture output so a failure @@ -668,6 +753,22 @@ cmd_start() { return fi + # Hot-plug the host filesystem. The snapshot was captured without + # virtfs, so the running container has an empty /host bind mount until + # we add the 9p device and mount it in the guest. Required for routes + # like /local-emulator/project that read user-supplied paths via /host. + log "Hot-plugging host filesystem..." + if ! qmp_hotplug_9p; then + warn "Failed to hot-plug 9p device — falling back to cold boot." + snapshot_fallback_to_cold_boot + return + fi + if ! qga_mount_host_fs; then + warn "Failed to mount host fs in guest — falling back to cold boot." + snapshot_fallback_to_cold_boot + return + fi + if [ "$EMULATOR_NO_ROTATION" = "1" ]; then warn "EMULATOR_NO_ROTATION=1: snapshot's placeholder secrets are in effect — do not expose this instance." # The placeholder PCK is live in the running image; publish it to the diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts index 3833cffeeb..12b3080892 100644 --- a/packages/stack-cli/src/commands/emulator.ts +++ b/packages/stack-cli/src/commands/emulator.ts @@ -592,11 +592,17 @@ export function registerEmulatorCommand(program: Command) { const apiUrl = `http://127.0.0.1:${backendPort}`; childEnv.STACK_PROJECT_ID = creds.project_id; childEnv.NEXT_PUBLIC_STACK_PROJECT_ID = creds.project_id; + childEnv.VITE_STACK_PROJECT_ID = creds.project_id; + childEnv.EXPO_PUBLIC_STACK_PROJECT_ID = creds.project_id; childEnv.STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key; childEnv.NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key; + childEnv.VITE_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key; + childEnv.EXPO_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key; childEnv.STACK_SECRET_SERVER_KEY = creds.secret_server_key; childEnv.STACK_API_URL = apiUrl; childEnv.NEXT_PUBLIC_STACK_API_URL = apiUrl; + childEnv.VITE_STACK_API_URL = apiUrl; + childEnv.EXPO_PUBLIC_STACK_API_URL = apiUrl; } const child = spawn(cmd, { shell: true, stdio: "inherit", env: childEnv }); From 037755ba161dd0d0c4b2bf1f295474da7bdc0d2b Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 16:53:07 -0700 Subject: [PATCH 11/25] retry tsdown migration build to survive qemu-user futex hangs Cross-arch arm64-on-amd64 docker buildx runs the rolldown-backed tsdown build under qemu-user, whose futex emulation occasionally deadlocks the worker threads. Wrap the call in a bounded timeout + 3-attempt retry so a hang fails the layer in <11min and recovers on the next try. --- docker/local-emulator/Dockerfile | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docker/local-emulator/Dockerfile b/docker/local-emulator/Dockerfile index 138270b405..603999b649 100644 --- a/docker/local-emulator/Dockerfile +++ b/docker/local-emulator/Dockerfile @@ -57,8 +57,22 @@ ENV NEXT_PUBLIC_STACK_STRIPE_PUBLISHABLE_KEY=pk_test_mock_publishable_key_for_lo # Build the backend NextJS app RUN pnpm turbo run docker-build --filter=@stackframe/backend... --filter=@stackframe/dashboard... -# Build the self-host seed script -RUN cd apps/backend && pnpm build-self-host-migration-script +# Build the self-host seed script. +# tsdown -> rolldown is multi-threaded Rust; under qemu-user (cross-arch +# arm64-on-amd64) its futex emulation occasionally deadlocks and the build +# hangs forever. Bound each attempt and retry to ride out the race. +RUN cd apps/backend && \ + attempt=1; \ + while :; do \ + timeout --kill-after=30s 600s pnpm build-self-host-migration-script && break; \ + rc=$?; \ + if [ "$attempt" -ge 3 ]; then \ + echo "build-self-host-migration-script failed after $attempt attempts (last rc=$rc)" >&2; \ + exit "$rc"; \ + fi; \ + echo "build-self-host-migration-script attempt $attempt failed (rc=$rc); retrying..." >&2; \ + attempt=$((attempt + 1)); \ + done # Prune node_modules for runtime: remove dev tools, heavy UI packages, From 894c1ce77cdce41450dffe788a0aaaf4db1fc3b8 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 18:27:33 -0700 Subject: [PATCH 12/25] fix CLI artifact download + build arm64 emulator on macOS runner - Fix 415 on artifact download: use application/vnd.github+json Accept header - Fix EACCES on run-emulator.sh: chmod +x at runtime (npm strips execute bit) - Move arm64 emulator build to a macOS-15 runner with HVF so the snapshot is portable to developer Macs (KVM snapshots from Linux are not resumable under HVF due to differing -cpu max feature sets) --- .../workflows/qemu-emulator-build-arm64.yaml | 134 ++++++++++++++++++ .github/workflows/qemu-emulator-build.yaml | 11 +- packages/stack-cli/src/commands/emulator.ts | 14 +- 3 files changed, 147 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/qemu-emulator-build-arm64.yaml diff --git a/.github/workflows/qemu-emulator-build-arm64.yaml b/.github/workflows/qemu-emulator-build-arm64.yaml new file mode 100644 index 0000000000..c58e06d40f --- /dev/null +++ b/.github/workflows/qemu-emulator-build-arm64.yaml @@ -0,0 +1,134 @@ +name: Build QEMU Emulator Image (arm64 / macOS) + +# arm64 emulator images are built on a macOS Apple Silicon runner so the +# snapshot is captured under HVF — the same accelerator developer Macs use. +# KVM snapshots (from Linux runners) are NOT resumable under HVF because +# `-cpu max` expands to different feature sets under each accelerator. + +on: + push: + branches: + - main + - dev + pull_request: + paths: + - 'docker/local-emulator/**' + - '.github/workflows/qemu-emulator-build-arm64.yaml' + workflow_dispatch: + +concurrency: + group: qemu-arm64-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }} + +env: + EMULATOR_IMAGE_NAME: stack-local-emulator + EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images + EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run + +jobs: + build: + name: Build QEMU Image (arm64) + runs-on: macos-15 + timeout-minutes: 120 + + steps: + - uses: actions/checkout@v6 + + - uses: pnpm/action-setup@v4 + with: + version: 10.23.0 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Install system dependencies + run: brew install qemu socat zstd + + - name: Set up Docker via colima + run: | + brew install docker docker-buildx colima + mkdir -p ~/.docker/cli-plugins + ln -sfn "$(brew --prefix docker-buildx)/bin/docker-buildx" ~/.docker/cli-plugins/docker-buildx + colima start --cpu 4 --memory 6 --disk 60 --arch aarch64 + docker info + docker buildx version + + - name: Verify QEMU + HVF + run: | + qemu-system-aarch64 --version + if qemu-system-aarch64 -accel help 2>&1 | grep -q hvf; then + echo "HVF available — snapshot will be portable to developer Macs" + else + echo "::error::HVF not available on this runner" + exit 1 + fi + + - name: Build QEMU image + run: | + chmod +x docker/local-emulator/qemu/build-image.sh + EMULATOR_PROVISION_TIMEOUT=6000 \ + docker/local-emulator/qemu/build-image.sh arm64 + + - name: Generate emulator env + run: node docker/local-emulator/generate-env-development.mjs + + # HVF gives us native-speed arm64 — we can verify the image boots + # and services come up, unlike the old cross-arch TCG path. + - name: Build stack-cli + run: | + pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...' + pnpm exec turbo run build --filter='@stackframe/stack-cli...' + + - name: Start emulator and verify + env: + EMULATOR_ARCH: arm64 + EMULATOR_READY_TIMEOUT: 3200 + EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }} + EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }} + run: node packages/stack-cli/dist/index.js emulator start + + - name: Verify services are healthy + env: + EMULATOR_ARCH: arm64 + EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }} + EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }} + run: node packages/stack-cli/dist/index.js emulator status + + - name: Stop emulator + if: always() + env: + EMULATOR_ARCH: arm64 + EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }} + EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }} + run: node packages/stack-cli/dist/index.js emulator stop + + - name: Print serial log on failure + if: failure() + run: | + tail -100 "$EMULATOR_RUN_DIR/vm/serial.log" 2>/dev/null || true + + - name: Package image + run: | + BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-arm64.qcow2" + SAVEVM="docker/local-emulator/qemu/images/stack-emulator-arm64.savevm.zst" + cp "$BASE_IMG" "stack-emulator-arm64.qcow2" + if [ -f "$SAVEVM" ]; then + cp "$SAVEVM" "stack-emulator-arm64.savevm.zst" + ls -lh "stack-emulator-arm64.savevm.zst" + else + echo "::error::Snapshot was not produced — fast-start will be unavailable" + exit 1 + fi + + - name: Upload image artifact + uses: actions/upload-artifact@v4 + with: + name: qemu-emulator-arm64 + path: | + stack-emulator-arm64.qcow2 + stack-emulator-arm64.savevm.zst + if-no-files-found: error + retention-days: 30 + compression-level: 0 diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml index e2298401d8..81cb509e4b 100644 --- a/.github/workflows/qemu-emulator-build.yaml +++ b/.github/workflows/qemu-emulator-build.yaml @@ -35,16 +35,11 @@ jobs: matrix: include: # amd64 runs natively under KVM on ubicloud's amd64 runner. + # arm64 is built in a separate workflow on a macOS runner (HVF) + # so that the snapshot is portable to developer Macs. + # See qemu-emulator-build-arm64.yaml. - arch: amd64 runner: ubicloud-standard-8 - # arm64 runs under cross-arch TCG on ubicloud's amd64 runner. - # No KVM for arm64 guests on an amd64 host; cortex-a72 + V8 - # --jitless together sidestep the SIGTRAPs that cross-arch TCG - # hits on aggressive arm64 JIT code. Smoke test is still skipped - # because the backend can't come up reliably under cross-arch - # TCG within any sane window. - - arch: arm64 - runner: ubicloud-standard-8 steps: - uses: actions/checkout@v6 diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts index 12b3080892..9088bbc3f0 100644 --- a/packages/stack-cli/src/commands/emulator.ts +++ b/packages/stack-cli/src/commands/emulator.ts @@ -1,7 +1,7 @@ import { Command } from "commander"; import { execFileSync, spawn } from "child_process"; import extract from "extract-zip"; -import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs"; +import { chmodSync, createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs"; import { homedir } from "os"; import { dirname, join, resolve } from "path"; import { Readable } from "stream"; @@ -143,12 +143,18 @@ async function ghApi(path: string): Promise { function emulatorScriptsDir(): string { const here = dirname(fileURLToPath(import.meta.url)); const bundled = join(here, "emulator"); - if (existsSync(join(bundled, "run-emulator.sh"))) return bundled; + if (existsSync(join(bundled, "run-emulator.sh"))) return ensureExecutable(bundled); const repo = resolve(here, "../../../docker/local-emulator/qemu"); - if (existsSync(join(repo, "run-emulator.sh"))) return repo; + if (existsSync(join(repo, "run-emulator.sh"))) return ensureExecutable(repo); throw new CliError("Emulator scripts not found in CLI bundle."); } +// npm pack strips the execute bit from non-`bin` files, so restore it here. +function ensureExecutable(scriptsDir: string): string { + try { chmodSync(join(scriptsDir, "run-emulator.sh"), 0o755); } catch { /* best-effort */ } + return scriptsDir; +} + function baseEnvPath(): string { // Lives one directory up from the scripts dir in both bundled and repo // layouts (dist/.env.development vs docker/local-emulator/.env.development). @@ -467,7 +473,7 @@ async function downloadArtifactByName(repo: string, runId: string, name: string, console.log(`Downloading artifact '${name}' from run ${runId}...`); await downloadWithProgress( `${GITHUB_API}/repos/${repo}/actions/artifacts/${match.id}/zip`, - { Accept: "application/octet-stream", Authorization: `Bearer ${token}` }, + { Accept: "application/vnd.github+json", Authorization: `Bearer ${token}` }, zipPath, match.size_in_bytes, ); From 54ecda8701a3d27a8770208ec8bd851c324639aa Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 18:31:57 -0700 Subject: [PATCH 13/25] fix colima on GHA macOS: use QEMU backend instead of VZ driver --- .github/workflows/qemu-emulator-build-arm64.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/qemu-emulator-build-arm64.yaml b/.github/workflows/qemu-emulator-build-arm64.yaml index c58e06d40f..5018d22c1d 100644 --- a/.github/workflows/qemu-emulator-build-arm64.yaml +++ b/.github/workflows/qemu-emulator-build-arm64.yaml @@ -49,9 +49,11 @@ jobs: - name: Set up Docker via colima run: | brew install docker docker-buildx colima - mkdir -p ~/.docker/cli-plugins - ln -sfn "$(brew --prefix docker-buildx)/bin/docker-buildx" ~/.docker/cli-plugins/docker-buildx - colima start --cpu 4 --memory 6 --disk 60 --arch aarch64 + # Wire up buildx as a CLI plugin + mkdir -p ~/.docker + echo '{"cliPluginsExtraDirs":["/opt/homebrew/lib/docker/cli-plugins"]}' > ~/.docker/config.json + # VZ driver doesn't work on GHA macOS runners — use QEMU backend + colima start --vm-type=qemu --cpu 4 --memory 6 --disk 60 --arch aarch64 docker info docker buildx version From 49a20ed019669fd4deef4fa350a95c57f660f271 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 18:36:50 -0700 Subject: [PATCH 14/25] split arm64 build: Docker on Linux, QEMU snapshot on macOS Docker is difficult to run on macOS CI runners (colima VZ and QEMU backends both crash). Split into two stages: 1. docker-build (Linux): builds arm64 Docker image, exports tarball 2. qemu-snapshot (macOS): provisions QEMU VM under HVF, captures snapshot Add SKIP_DOCKER_BUILD=1 to build-image.sh to reuse a pre-built bundle. --- .../workflows/qemu-emulator-build-arm64.yaml | 117 ++++++++++++------ docker/local-emulator/qemu/build-image.sh | 8 +- 2 files changed, 88 insertions(+), 37 deletions(-) diff --git a/.github/workflows/qemu-emulator-build-arm64.yaml b/.github/workflows/qemu-emulator-build-arm64.yaml index 5018d22c1d..d50ed633f7 100644 --- a/.github/workflows/qemu-emulator-build-arm64.yaml +++ b/.github/workflows/qemu-emulator-build-arm64.yaml @@ -1,9 +1,11 @@ name: Build QEMU Emulator Image (arm64 / macOS) -# arm64 emulator images are built on a macOS Apple Silicon runner so the -# snapshot is captured under HVF — the same accelerator developer Macs use. -# KVM snapshots (from Linux runners) are NOT resumable under HVF because -# `-cpu max` expands to different feature sets under each accelerator. +# arm64 emulator images are built in two stages: +# 1. docker-build (Linux): builds the Docker container image for arm64 and +# exports a tarball — Docker is painful to run on macOS CI runners. +# 2. qemu-snapshot (macOS): boots the image under HVF on Apple Silicon, +# provisions it, and captures a snapshot. HVF snapshots are portable to +# developer Macs; KVM snapshots are NOT (differing -cpu max features). on: push: @@ -22,14 +24,68 @@ concurrency: env: EMULATOR_IMAGE_NAME: stack-local-emulator - EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images - EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run jobs: - build: - name: Build QEMU Image (arm64) + # ---------- Stage 1: build Docker image on Linux ---------- + docker-build: + name: Build Docker Image (arm64) + runs-on: ubicloud-standard-8 + timeout-minutes: 60 + + steps: + - uses: actions/checkout@v6 + + - name: Set up QEMU user-mode emulation + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - uses: pnpm/action-setup@v4 + with: + version: 10.23.0 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Generate emulator env + run: node docker/local-emulator/generate-env-development.mjs + + - name: Build arm64 Docker image + run: | + docker buildx build \ + --platform linux/arm64 \ + --tag "$EMULATOR_IMAGE_NAME" \ + --load \ + -f docker/local-emulator/Dockerfile \ + . + + - name: Export Docker image bundle + run: | + mkdir -p /tmp/bundle + docker save "$EMULATOR_IMAGE_NAME" | gzip -c > /tmp/bundle/emulator-arm64-docker-images.tar.gz + docker image inspect --format '{{.ID}}' "$EMULATOR_IMAGE_NAME" > /tmp/bundle/emulator-arm64-docker-images.tar.gz.image-ids + ls -lh /tmp/bundle/ + + - name: Upload Docker bundle + uses: actions/upload-artifact@v4 + with: + name: arm64-docker-bundle + path: /tmp/bundle/ + retention-days: 1 + compression-level: 0 + + # ---------- Stage 2: QEMU provision + snapshot on macOS (HVF) ---------- + qemu-snapshot: + name: QEMU Snapshot (arm64 / HVF) + needs: docker-build runs-on: macos-15 timeout-minutes: 120 + env: + EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images + EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run steps: - uses: actions/checkout@v6 @@ -46,17 +102,6 @@ jobs: - name: Install system dependencies run: brew install qemu socat zstd - - name: Set up Docker via colima - run: | - brew install docker docker-buildx colima - # Wire up buildx as a CLI plugin - mkdir -p ~/.docker - echo '{"cliPluginsExtraDirs":["/opt/homebrew/lib/docker/cli-plugins"]}' > ~/.docker/config.json - # VZ driver doesn't work on GHA macOS runners — use QEMU backend - colima start --vm-type=qemu --cpu 4 --memory 6 --disk 60 --arch aarch64 - docker info - docker buildx version - - name: Verify QEMU + HVF run: | qemu-system-aarch64 --version @@ -67,17 +112,26 @@ jobs: exit 1 fi - - name: Build QEMU image + - name: Download Docker bundle + uses: actions/download-artifact@v4 + with: + name: arm64-docker-bundle + path: ${{ env.EMULATOR_IMAGE_DIR }}/ + + - name: Generate emulator env + run: node docker/local-emulator/generate-env-development.mjs + + - name: Build QEMU image (provision + snapshot) run: | chmod +x docker/local-emulator/qemu/build-image.sh + # SKIP_DOCKER_BUILD=1 tells build-image.sh to skip the Docker + # build + export steps — we already have the bundle from stage 1. EMULATOR_PROVISION_TIMEOUT=6000 \ + SKIP_DOCKER_BUILD=1 \ docker/local-emulator/qemu/build-image.sh arm64 - - name: Generate emulator env - run: node docker/local-emulator/generate-env-development.mjs - - # HVF gives us native-speed arm64 — we can verify the image boots - # and services come up, unlike the old cross-arch TCG path. + # HVF gives us native-speed arm64 — verify the image boots and + # services come up (previously impossible under cross-arch TCG). - name: Build stack-cli run: | pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...' @@ -87,34 +141,27 @@ jobs: env: EMULATOR_ARCH: arm64 EMULATOR_READY_TIMEOUT: 3200 - EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }} - EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }} run: node packages/stack-cli/dist/index.js emulator start - name: Verify services are healthy env: EMULATOR_ARCH: arm64 - EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }} - EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }} run: node packages/stack-cli/dist/index.js emulator status - name: Stop emulator if: always() env: EMULATOR_ARCH: arm64 - EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }} - EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }} run: node packages/stack-cli/dist/index.js emulator stop - name: Print serial log on failure if: failure() - run: | - tail -100 "$EMULATOR_RUN_DIR/vm/serial.log" 2>/dev/null || true + run: tail -100 "$EMULATOR_RUN_DIR/vm/serial.log" 2>/dev/null || true - name: Package image run: | - BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-arm64.qcow2" - SAVEVM="docker/local-emulator/qemu/images/stack-emulator-arm64.savevm.zst" + BASE_IMG="$EMULATOR_IMAGE_DIR/stack-emulator-arm64.qcow2" + SAVEVM="$EMULATOR_IMAGE_DIR/stack-emulator-arm64.savevm.zst" cp "$BASE_IMG" "stack-emulator-arm64.qcow2" if [ -f "$SAVEVM" ]; then cp "$SAVEVM" "stack-emulator-arm64.savevm.zst" diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh index 295a7972ee..55d0cb2938 100755 --- a/docker/local-emulator/qemu/build-image.sh +++ b/docker/local-emulator/qemu/build-image.sh @@ -657,8 +657,12 @@ BUILD_ENV_FILE="$REPO_ROOT/docker/local-emulator/.env.development" for arch in "${TARGET_ARCHS[@]}"; do local_base="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2" download_cloud_image "$arch" "$local_base" - build_local_emulator_image "$arch" - prepare_bundle_artifacts "$arch" + if [ "${SKIP_DOCKER_BUILD:-0}" = "1" ]; then + log "SKIP_DOCKER_BUILD=1: reusing pre-built Docker bundle" + else + build_local_emulator_image "$arch" + prepare_bundle_artifacts "$arch" + fi build_one "$arch" done From 11531ebc4b2ee0a372d67446d2f88d7612c56f10 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 18:41:13 -0700 Subject: [PATCH 15/25] fix check_deps: skip docker requirement when SKIP_DOCKER_BUILD=1 Also validate that the pre-built bundle exists before proceeding. --- docker/local-emulator/qemu/build-image.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh index 55d0cb2938..1b082d82c7 100755 --- a/docker/local-emulator/qemu/build-image.sh +++ b/docker/local-emulator/qemu/build-image.sh @@ -60,9 +60,12 @@ check_deps() { command -v "$qemu_bin" >/dev/null 2>&1 || missing+=("$qemu_bin") done - for cmd in qemu-img curl docker gzip; do + for cmd in qemu-img curl gzip; do command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd") done + if [ "${SKIP_DOCKER_BUILD:-0}" != "1" ]; then + command -v docker >/dev/null 2>&1 || missing+=("docker") + fi if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then for cmd in socat zstd; do @@ -659,6 +662,11 @@ for arch in "${TARGET_ARCHS[@]}"; do download_cloud_image "$arch" "$local_base" if [ "${SKIP_DOCKER_BUILD:-0}" = "1" ]; then log "SKIP_DOCKER_BUILD=1: reusing pre-built Docker bundle" + local expected_bundle="$IMAGE_DIR/emulator-${arch}-docker-images.tar.gz" + if [ ! -f "$expected_bundle" ]; then + err "Pre-built bundle not found: $expected_bundle" + exit 1 + fi else build_local_emulator_image "$arch" prepare_bundle_artifacts "$arch" From 753463702198c306eccaea6d7ca2c58582f7060d Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 19:12:52 -0700 Subject: [PATCH 16/25] fix lint warning + remove invalid `local` in top-level loop - Split single-line try/catch to satisfy max-statements-per-line - Remove `local` keyword from top-level for-loop (only valid in functions) --- docker/local-emulator/qemu/build-image.sh | 5 ++--- packages/stack-cli/src/commands/emulator.ts | 6 +++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh index 1b082d82c7..5a8c4071ba 100755 --- a/docker/local-emulator/qemu/build-image.sh +++ b/docker/local-emulator/qemu/build-image.sh @@ -662,9 +662,8 @@ for arch in "${TARGET_ARCHS[@]}"; do download_cloud_image "$arch" "$local_base" if [ "${SKIP_DOCKER_BUILD:-0}" = "1" ]; then log "SKIP_DOCKER_BUILD=1: reusing pre-built Docker bundle" - local expected_bundle="$IMAGE_DIR/emulator-${arch}-docker-images.tar.gz" - if [ ! -f "$expected_bundle" ]; then - err "Pre-built bundle not found: $expected_bundle" + if [ ! -f "$IMAGE_DIR/emulator-${arch}-docker-images.tar.gz" ]; then + err "Pre-built bundle not found: $IMAGE_DIR/emulator-${arch}-docker-images.tar.gz" exit 1 fi else diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts index 9088bbc3f0..942b436cd7 100644 --- a/packages/stack-cli/src/commands/emulator.ts +++ b/packages/stack-cli/src/commands/emulator.ts @@ -151,7 +151,11 @@ function emulatorScriptsDir(): string { // npm pack strips the execute bit from non-`bin` files, so restore it here. function ensureExecutable(scriptsDir: string): string { - try { chmodSync(join(scriptsDir, "run-emulator.sh"), 0o755); } catch { /* best-effort */ } + try { + chmodSync(join(scriptsDir, "run-emulator.sh"), 0o755); + } catch { + // best-effort + } return scriptsDir; } From 288b80ee0d2f3253bb0dfc2783a89a290e609d1c Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Wed, 15 Apr 2026 19:33:31 -0700 Subject: [PATCH 17/25] fix empty array expansion under bash 3.2 (macOS) macOS ships bash 3.2 which treats empty arrays as unbound under set -u. Use ${arr[@]+"${arr[@]}"} idiom for virtfs_args and snapshot_args. --- docker/local-emulator/qemu/build-image.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh index 5a8c4071ba..19476d658c 100755 --- a/docker/local-emulator/qemu/build-image.sh +++ b/docker/local-emulator/qemu/build-image.sh @@ -503,8 +503,8 @@ build_one() { -drive "file=$bundle_iso,format=raw,if=virtio,readonly=on" \ -netdev user,id=net0 \ -device virtio-net-pci,netdev=net0 \ - "${virtfs_args[@]}" \ - "${snapshot_args[@]}" \ + ${virtfs_args[@]+"${virtfs_args[@]}"} \ + ${snapshot_args[@]+"${snapshot_args[@]}"} \ -serial "file:$serial_log" \ -display none \ -daemonize \ From d94aa661d83f4ad6f34d4e0b7e6ba484ba19c066 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Thu, 16 Apr 2026 11:12:42 -0700 Subject: [PATCH 18/25] capture emulator snapshot locally during pull instead of shipping from CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QEMU migration state isn't portable across accelerators — a KVM-captured snapshot won't resume under HVF, and `-cpu max` feature sets differ across hosts. Instead of trying to match every (KVM/HVF/TCG, amd64/arm64) combination in CI, capture the snapshot on the user's own machine during `stack emulator pull`: download the qcow2, cold-boot once, wait for all services, QMP migrate via mapped-ram + multifd, compress. Subsequent `stack emulator start`s fast-resume in ~3-8s as before. - Factor qmp_session + capture_vm_state out of build-image.sh into common.sh so run-emulator.sh can call them. - Add cmd_capture to run-emulator.sh. build_qemu_cmd emits the resume-compatible device layout (phantom ISOs, no virtfs, fsdev + pcie-root-port, pinned 4096MB/4CPU) with -incoming defer gated on an actual snapshot being present, so capture mode reuses the same path. - Capture regenerates runtime-config.iso with STACK_EMULATOR_VM_DIR_HOST empty — virtfs is detached for migration compat so /host isn't mounted; the `install internal-pck → /host/$VM_DIR_HOST` path would otherwise fail and restart-loop stack.service. Mirrors build-image.sh's CI runtime.env shape. - stack-cli `pull` downloads only the qcow2 then invokes run-emulator.sh capture. Add --skip-snapshot for CI/debug. startEmulator auto-captures on the auto-pull fallback. - Revert the arm64 CI split: delete qemu-emulator-build-arm64.yaml, restore arm64 to the unified matrix on ubicloud-standard-8 under cross-arch TCG (macOS HVF runner existed only to produce a portable snapshot; no longer needed). Drop savevm.zst from package/upload/publish steps; update release notes. Verified end-to-end on an arm64 Mac under HVF: capture 50s, fast-resume 6.5s, all services green. --- .../workflows/qemu-emulator-build-arm64.yaml | 183 ------------------ .github/workflows/qemu-emulator-build.yaml | 58 ++---- docker/local-emulator/qemu/build-image.sh | 138 +------------ docker/local-emulator/qemu/common.sh | 139 +++++++++++++ docker/local-emulator/qemu/run-emulator.sh | 155 +++++++++++++-- packages/stack-cli/src/commands/emulator.ts | 54 ++++-- 6 files changed, 337 insertions(+), 390 deletions(-) delete mode 100644 .github/workflows/qemu-emulator-build-arm64.yaml diff --git a/.github/workflows/qemu-emulator-build-arm64.yaml b/.github/workflows/qemu-emulator-build-arm64.yaml deleted file mode 100644 index d50ed633f7..0000000000 --- a/.github/workflows/qemu-emulator-build-arm64.yaml +++ /dev/null @@ -1,183 +0,0 @@ -name: Build QEMU Emulator Image (arm64 / macOS) - -# arm64 emulator images are built in two stages: -# 1. docker-build (Linux): builds the Docker container image for arm64 and -# exports a tarball — Docker is painful to run on macOS CI runners. -# 2. qemu-snapshot (macOS): boots the image under HVF on Apple Silicon, -# provisions it, and captures a snapshot. HVF snapshots are portable to -# developer Macs; KVM snapshots are NOT (differing -cpu max features). - -on: - push: - branches: - - main - - dev - pull_request: - paths: - - 'docker/local-emulator/**' - - '.github/workflows/qemu-emulator-build-arm64.yaml' - workflow_dispatch: - -concurrency: - group: qemu-arm64-${{ github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }} - -env: - EMULATOR_IMAGE_NAME: stack-local-emulator - -jobs: - # ---------- Stage 1: build Docker image on Linux ---------- - docker-build: - name: Build Docker Image (arm64) - runs-on: ubicloud-standard-8 - timeout-minutes: 60 - - steps: - - uses: actions/checkout@v6 - - - name: Set up QEMU user-mode emulation - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - uses: pnpm/action-setup@v4 - with: - version: 10.23.0 - - - uses: actions/setup-node@v4 - with: - node-version: 22 - cache: pnpm - - - name: Generate emulator env - run: node docker/local-emulator/generate-env-development.mjs - - - name: Build arm64 Docker image - run: | - docker buildx build \ - --platform linux/arm64 \ - --tag "$EMULATOR_IMAGE_NAME" \ - --load \ - -f docker/local-emulator/Dockerfile \ - . - - - name: Export Docker image bundle - run: | - mkdir -p /tmp/bundle - docker save "$EMULATOR_IMAGE_NAME" | gzip -c > /tmp/bundle/emulator-arm64-docker-images.tar.gz - docker image inspect --format '{{.ID}}' "$EMULATOR_IMAGE_NAME" > /tmp/bundle/emulator-arm64-docker-images.tar.gz.image-ids - ls -lh /tmp/bundle/ - - - name: Upload Docker bundle - uses: actions/upload-artifact@v4 - with: - name: arm64-docker-bundle - path: /tmp/bundle/ - retention-days: 1 - compression-level: 0 - - # ---------- Stage 2: QEMU provision + snapshot on macOS (HVF) ---------- - qemu-snapshot: - name: QEMU Snapshot (arm64 / HVF) - needs: docker-build - runs-on: macos-15 - timeout-minutes: 120 - env: - EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images - EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run - - steps: - - uses: actions/checkout@v6 - - - uses: pnpm/action-setup@v4 - with: - version: 10.23.0 - - - uses: actions/setup-node@v4 - with: - node-version: 22 - cache: pnpm - - - name: Install system dependencies - run: brew install qemu socat zstd - - - name: Verify QEMU + HVF - run: | - qemu-system-aarch64 --version - if qemu-system-aarch64 -accel help 2>&1 | grep -q hvf; then - echo "HVF available — snapshot will be portable to developer Macs" - else - echo "::error::HVF not available on this runner" - exit 1 - fi - - - name: Download Docker bundle - uses: actions/download-artifact@v4 - with: - name: arm64-docker-bundle - path: ${{ env.EMULATOR_IMAGE_DIR }}/ - - - name: Generate emulator env - run: node docker/local-emulator/generate-env-development.mjs - - - name: Build QEMU image (provision + snapshot) - run: | - chmod +x docker/local-emulator/qemu/build-image.sh - # SKIP_DOCKER_BUILD=1 tells build-image.sh to skip the Docker - # build + export steps — we already have the bundle from stage 1. - EMULATOR_PROVISION_TIMEOUT=6000 \ - SKIP_DOCKER_BUILD=1 \ - docker/local-emulator/qemu/build-image.sh arm64 - - # HVF gives us native-speed arm64 — verify the image boots and - # services come up (previously impossible under cross-arch TCG). - - name: Build stack-cli - run: | - pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...' - pnpm exec turbo run build --filter='@stackframe/stack-cli...' - - - name: Start emulator and verify - env: - EMULATOR_ARCH: arm64 - EMULATOR_READY_TIMEOUT: 3200 - run: node packages/stack-cli/dist/index.js emulator start - - - name: Verify services are healthy - env: - EMULATOR_ARCH: arm64 - run: node packages/stack-cli/dist/index.js emulator status - - - name: Stop emulator - if: always() - env: - EMULATOR_ARCH: arm64 - run: node packages/stack-cli/dist/index.js emulator stop - - - name: Print serial log on failure - if: failure() - run: tail -100 "$EMULATOR_RUN_DIR/vm/serial.log" 2>/dev/null || true - - - name: Package image - run: | - BASE_IMG="$EMULATOR_IMAGE_DIR/stack-emulator-arm64.qcow2" - SAVEVM="$EMULATOR_IMAGE_DIR/stack-emulator-arm64.savevm.zst" - cp "$BASE_IMG" "stack-emulator-arm64.qcow2" - if [ -f "$SAVEVM" ]; then - cp "$SAVEVM" "stack-emulator-arm64.savevm.zst" - ls -lh "stack-emulator-arm64.savevm.zst" - else - echo "::error::Snapshot was not produced — fast-start will be unavailable" - exit 1 - fi - - - name: Upload image artifact - uses: actions/upload-artifact@v4 - with: - name: qemu-emulator-arm64 - path: | - stack-emulator-arm64.qcow2 - stack-emulator-arm64.savevm.zst - if-no-files-found: error - retention-days: 30 - compression-level: 0 diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml index 81cb509e4b..380a8ab1fc 100644 --- a/.github/workflows/qemu-emulator-build.yaml +++ b/.github/workflows/qemu-emulator-build.yaml @@ -34,12 +34,18 @@ jobs: fail-fast: false matrix: include: - # amd64 runs natively under KVM on ubicloud's amd64 runner. - # arm64 is built in a separate workflow on a macOS runner (HVF) - # so that the snapshot is portable to developer Macs. - # See qemu-emulator-build-arm64.yaml. + # Both arches build on ubicloud's amd64 runner. amd64 uses KVM; + # arm64 runs under cross-arch TCG (slow, but only cloud-init + # provisioning has to complete — the boot/verify smoke test below + # is gated to amd64 because TCG can't boot Next.js in any + # reasonable time). Snapshots are NOT published — `stack emulator + # pull` captures one locally on first run, which is the only way + # to guarantee KVM/HVF/TCG + `-cpu max` compatibility on the + # user's machine. - arch: amd64 runner: ubicloud-standard-8 + - arch: arm64 + runner: ubicloud-standard-8 steps: - uses: actions/checkout@v6 @@ -168,29 +174,15 @@ jobs: - name: Package image run: | BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.qcow2" - SAVEVM="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.savevm.zst" cp "$BASE_IMG" "stack-emulator-${{ matrix.arch }}.qcow2" - if [ -f "$SAVEVM" ]; then - cp "$SAVEVM" "stack-emulator-${{ matrix.arch }}.savevm.zst" - ls -lh "stack-emulator-${{ matrix.arch }}.savevm.zst" - elif [ "${{ matrix.arch }}" = "amd64" ]; then - # amd64 is the fast-resume contract: if the build didn't produce a - # snapshot, fail loudly rather than silently shipping a - # cold-boot-only release. - echo "ERROR: snapshot build expected to produce $SAVEVM for amd64." >&2 - exit 1 - else - echo "NOTE: no savevm snapshot was produced for ${{ matrix.arch }}; fast-start will be unavailable for this arch." - fi + ls -lh "stack-emulator-${{ matrix.arch }}.qcow2" - name: Upload image artifact uses: actions/upload-artifact@v4 with: name: qemu-emulator-${{ matrix.arch }} - path: | - stack-emulator-${{ matrix.arch }}.qcow2 - stack-emulator-${{ matrix.arch }}.savevm.zst - if-no-files-found: warn + path: stack-emulator-${{ matrix.arch }}.qcow2 + if-no-files-found: error retention-days: 30 compression-level: 0 @@ -266,18 +258,14 @@ jobs: name: qemu-emulator-${{ matrix.arch }} path: ${{ github.workspace }}/.stack-emulator-images/ - - name: Place images into STACK_EMULATOR_HOME layout + - name: Place qcow2 into STACK_EMULATOR_HOME layout run: | mkdir -p "$HOME/.stack/emulator/images" cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$HOME/.stack/emulator/images/" - if [ -f "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.savevm.zst" ]; then - cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.savevm.zst" "$HOME/.stack/emulator/images/" - echo "Snapshot present — will test snapshot-resume path." - else - echo "No snapshot — will test cold-boot path." - fi ls -lh "$HOME/.stack/emulator/images/" + # No savevm.zst artifact (users capture locally via `emulator pull`), + # so `emulator start` cold-boots the qcow2. Budget accordingly. - name: Start emulator via CLI run: | EMULATOR_ARCH=${{ matrix.arch }} \ @@ -336,11 +324,6 @@ jobs: for f in artifacts/qemu-emulator-*/*.qcow2; do cp "$f" release/ done - # savevm.zst is optional — older branches may not produce it. Skip - # missing files rather than failing the publish. - for f in artifacts/qemu-emulator-*/*.savevm.zst; do - [ -f "$f" ] && cp "$f" release/ - done cat > release-notes.md </dev/null || true } -# Open a persistent QMP session on the monitor socket, negotiate capabilities, -# run a series of commands, and close. Commands are read from stdin (one JSON -# object per line); responses are written to stdout. Uses socat's bidirectional -# pipe so we can interleave request/response in one connection — QMP requires -# qmp_capabilities to come first and keeps state across commands. -# Keeps stdin open briefly after caller's input ends so QEMU has time to -# process the last command before socat closes. -qmp_session() { - local sock="$1" - local payload - payload="$(cat)" - ( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t30 - "UNIX-CONNECT:${sock}" -} - -# Drive the snapshot capture over QMP: -# 1. qmp_capabilities — exit negotiation mode. -# 2. stop — pause the VM so no more disk writes happen. -# 3. migrate to exec:zstd > — streams RAM/device state out. -# 4. Poll query-migrate until status=completed (or failed). -# 5. quit — terminate QEMU cleanly. -capture_vm_state() { - local sock="$1" - local guest_path="$2" - - if [ ! -S "$sock" ]; then - err "QMP monitor socket missing: $sock" - return 1 - fi - - log " QMP: stopping VM..." - { - printf '%s\n' '{"execute":"qmp_capabilities"}' - printf '%s\n' '{"execute":"stop"}' - } | qmp_session "$sock" >/dev/null || { - err "QMP stop failed" - return 1 - } - - log " QMP: enabling mapped-ram + multifd for fast resume..." - # mapped-ram: writes each RAM page to a fixed offset in the output file - # (vs the legacy streamed format). This lets the target QEMU mmap the file - # and fault pages lazily — and combined with multifd, load RAM in parallel. - # multifd-channels=4 matches our pinned SMP so the channels don't starve - # each other on the target's 4 vCPUs. - local caps_cmd params_cmd - caps_cmd='{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}' - params_cmd='{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}' - local setup_resp - setup_resp=$({ - printf '%s\n' '{"execute":"qmp_capabilities"}' - printf '%s\n' "$caps_cmd" - printf '%s\n' "$params_cmd" - } | qmp_session "$sock") || { - err "QMP capabilities setup failed" - return 1 - } - if printf '%s' "$setup_resp" | grep -q '"error"[[:space:]]*:'; then - err "QMP capabilities returned error: $setup_resp" - return 1 - fi - - log " QMP: migrating RAM state to ${guest_path}..." - # Use file: migration (native QEMU) instead of exec: to avoid relying on a - # spawned shell finding zstd in PATH. Compressed as a separate host step - # after migrate completes. - local migrate_cmd - migrate_cmd=$(printf '{"execute":"migrate","arguments":{"uri":"file:%s"}}' "$guest_path") - local migrate_resp - migrate_resp=$({ - printf '%s\n' '{"execute":"qmp_capabilities"}' - printf '%s\n' "$migrate_cmd" - } | qmp_session "$sock") || { - err "QMP migrate failed" - return 1 - } - if printf '%s' "$migrate_resp" | grep -q '"error"[[:space:]]*:'; then - err "QMP migrate returned error: $migrate_resp" - return 1 - fi - - # Poll migration status. Migration runs in the background after the - # migrate command returns; we watch for "completed" or "failed". - local migrate_timeout=600 - local waited=0 - local last_heartbeat=0 - while [ "$waited" -lt "$migrate_timeout" ]; do - local status_line status - status_line=$({ - printf '%s\n' '{"execute":"qmp_capabilities"}' - printf '%s\n' '{"execute":"query-migrate"}' - } | qmp_session "$sock" 2>/dev/null || true) - status="$(printf '%s\n' "$status_line" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')" - case "$status" in - completed) - log " QMP: migrate completed (${waited}s)" - break - ;; - failed|cancelled) - err " QMP: migrate ended with status=$status" - err " QMP response: $status_line" - return 1 - ;; - active|setup|device|"") - # still running - if [ "$((waited - last_heartbeat))" -ge 30 ]; then - local transferred - transferred=$(printf '%s' "$status_line" | grep -o '"transferred"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/') - log " QMP: migrate in progress (${waited}s, status=${status:-init}, transferred=${transferred:-0})" - last_heartbeat=$waited - fi - ;; - *) - log " QMP: migrate status=$status (${waited}s)" - ;; - esac - sleep 2 - waited=$((waited + 2)) - done - - if [ "$waited" -ge "$migrate_timeout" ]; then - err "QMP migrate timed out after ${migrate_timeout}s" - err "Last query-migrate response: $({ - printf '%s\n' '{\"execute\":\"qmp_capabilities\"}' - printf '%s\n' '{\"execute\":\"query-migrate\"}' - } | qmp_session "$sock" 2>/dev/null || true)" - return 1 - fi - - log " QMP: quitting VM..." - { - printf '%s\n' '{"execute":"qmp_capabilities"}' - printf '%s\n' '{"execute":"quit"}' - } | qmp_session "$sock" >/dev/null || true - - return 0 -} +# qmp_session() and capture_vm_state() live in common.sh; both build-image.sh +# (CI) and run-emulator.sh (stack emulator pull local capture) call them. build_one() { local arch="$1" diff --git a/docker/local-emulator/qemu/common.sh b/docker/local-emulator/qemu/common.sh index 1e3374dad4..38385e308b 100755 --- a/docker/local-emulator/qemu/common.sh +++ b/docker/local-emulator/qemu/common.sh @@ -68,3 +68,142 @@ make_iso_from_dir() { exit 1 fi } + +# Send one or more QMP commands over the monitor socket. Stdin is a stream of +# JSON objects; qmp_capabilities is always sent first to exit negotiation mode. +# Keep stdin open briefly after writing so socat doesn't close before QEMU +# responds — QMP replies in milliseconds so 0.5s is plenty. +# +# Callers: build-image.sh capture flow, run-emulator.sh cmd_capture. +qmp_session() { + local sock="$1" + local payload + payload="$(cat)" + ( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t30 - "UNIX-CONNECT:${sock}" +} + +# Drive the snapshot capture over QMP: +# 1. qmp_capabilities — exit negotiation mode. +# 2. stop — pause the VM so no more disk writes happen. +# 3. migrate-set-capabilities — enable mapped-ram + multifd for fast resume. +# 4. migrate to file: — streams RAM/device state out. +# 5. Poll query-migrate until status=completed (or failed). +# 6. quit — terminate QEMU cleanly. +# +# Depends on log/err/warn being defined by the sourcing script. +capture_vm_state() { + local sock="$1" + local guest_path="$2" + + if [ ! -S "$sock" ]; then + err "QMP monitor socket missing: $sock" + return 1 + fi + + log " QMP: stopping VM..." + { + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' '{"execute":"stop"}' + } | qmp_session "$sock" >/dev/null || { + err "QMP stop failed" + return 1 + } + + log " QMP: enabling mapped-ram + multifd for fast resume..." + # mapped-ram: writes each RAM page to a fixed offset in the output file + # (vs the legacy streamed format). This lets the target QEMU mmap the file + # and fault pages lazily — and combined with multifd, load RAM in parallel. + # multifd-channels=4 matches our pinned SMP so the channels don't starve + # each other on the target's 4 vCPUs. + local caps_cmd params_cmd + caps_cmd='{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}' + params_cmd='{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}' + local setup_resp + setup_resp=$({ + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' "$caps_cmd" + printf '%s\n' "$params_cmd" + } | qmp_session "$sock") || { + err "QMP capabilities setup failed" + return 1 + } + if printf '%s' "$setup_resp" | grep -q '"error"[[:space:]]*:'; then + err "QMP capabilities returned error: $setup_resp" + return 1 + fi + + log " QMP: migrating RAM state to ${guest_path}..." + # Use file: migration (native QEMU) instead of exec: to avoid relying on a + # spawned shell finding zstd in PATH. Compressed as a separate host step + # after migrate completes. + local migrate_cmd + migrate_cmd=$(printf '{"execute":"migrate","arguments":{"uri":"file:%s"}}' "$guest_path") + local migrate_resp + migrate_resp=$({ + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' "$migrate_cmd" + } | qmp_session "$sock") || { + err "QMP migrate failed" + return 1 + } + if printf '%s' "$migrate_resp" | grep -q '"error"[[:space:]]*:'; then + err "QMP migrate returned error: $migrate_resp" + return 1 + fi + + # Poll migration status. Migration runs in the background after the + # migrate command returns; we watch for "completed" or "failed". + local migrate_timeout=600 + local waited=0 + local last_heartbeat=0 + while [ "$waited" -lt "$migrate_timeout" ]; do + local status_line status + status_line=$({ + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' '{"execute":"query-migrate"}' + } | qmp_session "$sock" 2>/dev/null || true) + status="$(printf '%s\n' "$status_line" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')" + case "$status" in + completed) + log " QMP: migrate completed (${waited}s)" + break + ;; + failed|cancelled) + err " QMP: migrate ended with status=$status" + err " QMP response: $status_line" + return 1 + ;; + active|setup|device|"") + # still running + if [ "$((waited - last_heartbeat))" -ge 30 ]; then + local transferred + transferred=$(printf '%s' "$status_line" | grep -o '"transferred"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/') + log " QMP: migrate in progress (${waited}s, status=${status:-init}, transferred=${transferred:-0})" + last_heartbeat=$waited + fi + ;; + *) + log " QMP: migrate status=$status (${waited}s)" + ;; + esac + sleep 2 + waited=$((waited + 2)) + done + + if [ "$waited" -ge "$migrate_timeout" ]; then + err "QMP migrate timed out after ${migrate_timeout}s" + err "Last query-migrate response: $({ + printf '%s\n' '{\"execute\":\"qmp_capabilities\"}' + printf '%s\n' '{\"execute\":\"query-migrate\"}' + } | qmp_session "$sock" 2>/dev/null || true)" + return 1 + fi + + log " QMP: quitting VM..." + { + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' '{"execute":"quit"}' + } | qmp_session "$sock" >/dev/null || true + + return 0 +} diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh index 72f095cf57..aba9311b04 100755 --- a/docker/local-emulator/qemu/run-emulator.sh +++ b/docker/local-emulator/qemu/run-emulator.sh @@ -21,6 +21,13 @@ EMULATOR_NO_SNAPSHOT="${EMULATOR_NO_SNAPSHOT:-0}" # in place — acceptable for tests and CI that don't reach the emulator over # a shared network. Shaves ~2-3s off `emulator start`. EMULATOR_NO_ROTATION="${EMULATOR_NO_ROTATION:-0}" +# Internal: set to 1 by cmd_capture to build QEMU with the snapshot-compatible +# device layout (phantom ISOs, no virtfs, pcie-root-port, pinned 4096MB/4CPU) +# without the `-incoming defer` that resume mode adds. The captured snapshot +# must be byte-compatible with what the resume path will later feed to QEMU. +EMULATOR_CAPTURING_SNAPSHOT="${EMULATOR_CAPTURING_SNAPSHOT:-0}" +# Force re-capture even if a .savevm.zst is already present. +EMULATOR_FORCE_CAPTURE="${EMULATOR_FORCE_CAPTURE:-0}" # Fixed host-side ports for the QEMU emulator (267xx range). # Only user-facing services are exposed; internal deps stay inside the VM. @@ -87,7 +94,15 @@ runtime_iso_path() { } snapshot_available() { - [ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ] + [ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ "$EMULATOR_CAPTURING_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ] +} + +# True when QEMU must use the snapshot-compatible device layout — either to +# resume from an existing snapshot or to capture a new one. Resume adds +# `-incoming defer`; capture does not. Everything else (phantom ISOs, no +# virtfs, pcie-root-port, pinned RAM/SMP) matches. +snapshot_layout() { + snapshot_available || [ "$EMULATOR_CAPTURING_SNAPSHOT" = "1" ] } # Ensure the decompressed mapped-ram cache is up-to-date with the shipped @@ -163,6 +178,16 @@ ensure_runtime_config_iso() { # emulator:start`) rather than through the stack-cli, which generates the # ISO via packages/stack-cli/src/lib/iso.ts. Mirrors the field set + volume # label so the guest's render-stack-env mounts it the same way. + write_runtime_config_iso "$VM_DIR" +} + +# Write a STACKCFG runtime-config.iso containing runtime.env + base.env. +# The VM_DIR_HOST arg is the path to publish internal-pck / stack.log to on +# /host; pass empty string to suppress publication (used by capture mode +# where /host isn't mounted — virtfs is detached for snapshot compatibility, +# so any host-side write would fail and restart-loop stack.service). +write_runtime_config_iso() { + local vm_dir_host="$1" local base_env="$SCRIPT_DIR/../.env.development" if [ ! -f "$base_env" ]; then err "Cannot generate runtime config ISO: $base_env is missing." @@ -179,10 +204,10 @@ ensure_runtime_config_iso() { printf "STACK_EMULATOR_BACKEND_HOST_PORT=%s\n" "$EMULATOR_BACKEND_PORT" printf "STACK_EMULATOR_MINIO_HOST_PORT=%s\n" "$EMULATOR_MINIO_PORT" printf "STACK_EMULATOR_INBUCKET_HOST_PORT=%s\n" "$EMULATOR_INBUCKET_PORT" - printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$VM_DIR" + printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$vm_dir_host" } > "$cfg_dir/runtime.env" cp "$base_env" "$cfg_dir/base.env" - make_iso_from_dir "$cfg_iso" "STACKCFG" "$cfg_dir" + make_iso_from_dir "$(runtime_iso_path)" "STACKCFG" "$cfg_dir" } service_is_up() { @@ -259,13 +284,14 @@ build_qemu_cmd() { local current_fp current_fp="$(runtime_fingerprint "$base_img" "$savevm_file")" - if snapshot_available; then + if snapshot_layout; then # The savevm RAM state was captured against the base image's exact disk # state. An overlay with writes from a previous session diverges from # that point, so -incoming would resume RAM against inconsistent disk. # Always start from a fresh overlay in the snapshot path; per-session # state is not preserved. Users who want persistence can opt out with - # EMULATOR_NO_SNAPSHOT=1. + # EMULATOR_NO_SNAPSHOT=1. Capture mode also needs a clean overlay so the + # snapshot we write is taken against the base's known disk state. if [ -f "$VM_DIR/disk.qcow2" ]; then rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file" fi @@ -332,12 +358,16 @@ build_qemu_cmd() { # exist so the migration replay matches device IDs. Runtime-only devices # (virtfs, balloon) live at higher slots — extra at destination is fine. local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" snapshot_ram="$VM_RAM" - if snapshot_available; then - log "Snapshot found at $savevm_file — fast-resume enabled." - # -incoming defer: QEMU starts, waits for a QMP migrate-incoming command. - # We use that to set mapped-ram + multifd capabilities before loading, - # which enables parallel RAM restore (~2-3x faster than streamed decode). - snapshot_args+=(-incoming defer) + if snapshot_layout; then + if snapshot_available; then + log "Snapshot found at $savevm_file — fast-resume enabled." + # -incoming defer: QEMU starts, waits for a QMP migrate-incoming command. + # We use that to set mapped-ram + multifd capabilities before loading, + # which enables parallel RAM restore (~2-3x faster than streamed decode). + snapshot_args+=(-incoming defer) + else + log "Capture mode: booting with snapshot-compatible layout (no -incoming)." + fi snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}" # RAM size is baked into the snapshot; migration replay requires an # identical -m value. Pin to the build-time RAM (4096) and ignore @@ -374,7 +404,7 @@ build_qemu_cmd() { ) fi - if snapshot_available; then + if snapshot_layout; then QEMU_CMD=( "$qemu_bin" -machine "$machine" @@ -406,7 +436,7 @@ build_qemu_cmd() { # via human-monitor-command (errors come back as a return string, # not a QMP error). -fsdev "local,id=hostfs,path=/,security_model=none" - "${snapshot_args[@]}" + ${snapshot_args[@]+"${snapshot_args[@]}"} -serial "file:$VM_DIR/serial.log" -display none -daemonize @@ -842,6 +872,100 @@ cmd_reset() { log "Emulator state reset. Next start will be a fresh boot." } +# Cold-boot the VM with the snapshot-compatible device layout, wait for all +# services to be healthy, then capture a snapshot via QMP migrate and compress +# it to .savevm.zst. Called by `stack emulator pull` so first-run users get a +# fast-resume snapshot that's guaranteed compatible with their host's QEMU +# version + accelerator (which CI-built snapshots can't guarantee across +# KVM/HVF/TCG). +cmd_capture() { + if [ ! -f "$(image_path)" ]; then + err "Missing qcow2: $(image_path). Run 'stack emulator pull' first." + exit 1 + fi + if [ -s "$(savevm_path)" ] && [ "$EMULATOR_FORCE_CAPTURE" != "1" ]; then + log "Snapshot already present at $(savevm_path); skipping capture." + log "Pass EMULATOR_FORCE_CAPTURE=1 to rebuild it." + return 0 + fi + if is_running; then + err "Emulator is already running; stop it first (stack emulator stop)." + exit 1 + fi + + # Start with a clean slate if we're force-recapturing; stale raw/zst would + # otherwise make snapshot_available() return true and flip QEMU into + # -incoming defer mode. + rm -f "$(savevm_path)" "$(savevm_raw_path)" + + ensure_ports_free + mkdir -p "$RUN_DIR" "$VM_DIR" + # Regenerate runtime-config.iso with STACK_EMULATOR_VM_DIR_HOST empty — + # virtfs is detached in capture mode, so run-stack-container's + # `install internal-pck → /host/$VM_DIR_HOST/...` would fail and restart-loop + # stack.service. Mirrors build-image.sh's CI runtime.env shape. + rm -f "$(runtime_iso_path)" + write_runtime_config_iso "" + + info "Cold-booting VM to capture local snapshot (one-time, ~1-3 min)..." + EMULATOR_CAPTURING_SNAPSHOT=1 + start_vm + info "VM: 4096MB / 4 CPUs (pinned for snapshot compatibility)" + + # Cold boot with snapshot-compatible layout drops virtfs, so stack.service + # starts without /host mounted — fine for capture; hostfs is hot-plugged on + # resume via qmp_hotplug_9p. + if ! wait_for_condition "all services" "$READY_TIMEOUT" all_ready; then + tail_vm_logs + stop_vm + err "Services did not come up; capture aborted." + exit 1 + fi + + local raw tmp_raw zst tmp_zst + raw="$(savevm_raw_path)" + tmp_raw="${raw}.capture.tmp" + zst="$(savevm_path)" + tmp_zst="${zst}.capture.tmp" + rm -f "$tmp_raw" "$tmp_zst" + + log "Capturing VM state via QMP (mapped-ram + multifd)..." + if ! capture_vm_state "$VM_DIR/monitor.sock" "$tmp_raw"; then + err "QMP capture failed." + stop_vm + exit 1 + fi + + # capture_vm_state sent QMP quit; wait for QEMU to exit, then clean sockets. + local waited=0 + while [ "$waited" -lt 30 ] && is_running; do + sleep 1 + waited=$((waited + 1)) + done + if is_running; then + warn "QEMU did not exit after QMP quit; forcing." + stop_vm + fi + rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock" + + if [ ! -s "$tmp_raw" ]; then + err "Captured raw file is empty: $tmp_raw" + exit 1 + fi + + log "Compressing snapshot with zstd..." + zstd -1 -T0 -f -o "$tmp_zst" "$tmp_raw" + mv "$tmp_zst" "$zst" + # Keep the uncompressed file too — resume reads it directly via mapped-ram, + # and ensure_savevm_raw skips re-decompression when the raw's mtime >= zst's. + mv "$tmp_raw" "$raw" + touch -r "$zst" "$raw" + + local size + size="$(du -h "$zst" | cut -f1)" + log "Snapshot captured: $zst (${size})" +} + STATUS_FAILED=0 print_service_status() { @@ -889,12 +1013,12 @@ ACTION="start" while [[ $# -gt 0 ]]; do case "$1" in - start|stop|reset|status|bench) + start|stop|reset|status|bench|capture) ACTION="$1" shift ;; *) - echo "Usage: $0 [start|stop|reset|status|bench]" + echo "Usage: $0 [start|stop|reset|status|bench|capture]" exit 1 ;; esac @@ -906,4 +1030,5 @@ case "$ACTION" in reset) cmd_reset ;; status) cmd_status ;; bench) cmd_bench ;; + capture) cmd_capture ;; esac diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts index 942b436cd7..f9d6a6aae4 100644 --- a/packages/stack-cli/src/commands/emulator.ts +++ b/packages/stack-cli/src/commands/emulator.ts @@ -239,6 +239,9 @@ async function startEmulator(arch: "arm64" | "amd64"): Promise { if (!existsSync(img)) { console.log("No emulator image found. Pulling latest..."); await pullRelease(arch); + // Capture now so this and all subsequent starts resume fast. Skipping it + // would cold-boot today plus every future start (we never auto-capture). + await captureLocalSnapshot(arch); } prepareRuntimeConfigIso(); await runEmulator("start", { EMULATOR_ARCH: arch }); @@ -261,25 +264,26 @@ async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branc mkdirSync(imageDir, { recursive: true }); const diskAsset = `stack-emulator-${arch}.qcow2`; - // The savevm file enables the fast-resume path in run-emulator.sh. It's - // optional — older releases may not have it and the runtime cleanly falls - // back to a cold boot. - const snapshotAsset = `stack-emulator-${arch}.savevm.zst`; const release = await ghApi(`/repos/${repo}/releases/tags/${tag}`); const diskMatch = release.assets.find((a) => a.name === diskAsset); if (!diskMatch) { throw new CliError(`Asset ${diskAsset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`); } - const snapshotMatch = release.assets.find((a) => a.name === snapshotAsset); const token = githubToken(); - await downloadReleaseAsset(diskMatch, imageDir, diskAsset, token, tag); - if (snapshotMatch) { - await downloadReleaseAsset(snapshotMatch, imageDir, snapshotAsset, token, tag); - } else { - console.log(`Snapshot asset ${snapshotAsset} not available in release ${tag}; fast-start disabled for this image.`); - } +} + +// Cold-boot the VM, wait for services, capture a snapshot via QMP, compress, +// stop. Runs once per qcow2 download so subsequent `stack emulator start`s +// resume in ~3-8s. Snapshots are always captured on the user's own machine +// because QEMU migration state isn't portable across accelerators +// (KVM/HVF/TCG) or `-cpu max` feature sets. +async function captureLocalSnapshot(arch: "arm64" | "amd64"): Promise { + preflightForVmStart("pull", arch); + prepareRuntimeConfigIso(); + console.log("Capturing local snapshot (first-time, ~1-3 min cold boot + capture)..."); + await runEmulator("capture", { EMULATOR_ARCH: arch }); } async function downloadReleaseAsset( @@ -491,19 +495,20 @@ export function registerEmulatorCommand(program: Command) { emulator .command("pull") - .description("Download an emulator image from GitHub Releases or a PR build") + .description("Download an emulator image from GitHub Releases or a PR build, then capture a local fast-start snapshot") .option("--arch ", "Target architecture (default: current system arch)") .option("--branch ", "Release branch (default: dev)") .option("--tag ", "Specific release tag (default: latest)") .option("--repo ", "GitHub repository (default: stack-auth/stack-auth)") .option("--pr ", "Pull from a PR's CI artifacts") .option("--run ", "Pull from a specific workflow run's artifacts") - .action(async (opts) => { + .option("--skip-snapshot", "Download only the qcow2; skip the one-time local snapshot capture") + .action(async (opts: { arch?: string, repo?: string, branch?: string, tag?: string, pr?: string, run?: string, skipSnapshot?: boolean }) => { const arch = resolveArch(opts.arch); const repo = opts.repo ?? DEFAULT_REPO; if (opts.run || opts.pr) { - let runId = opts.run as string | undefined; + let runId = opts.run; if (!runId) { console.log(`Finding latest successful build for PR #${opts.pr}...`); const pr = await ghApi(`/repos/${repo}/pulls/${opts.pr}`); @@ -521,22 +526,33 @@ export function registerEmulatorCommand(program: Command) { mkdirSync(imageDir, { recursive: true }); const dest = join(imageDir, `stack-emulator-${arch}.qcow2`); const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`); + const snapshotRawDest = join(imageDir, `stack-emulator-${arch}.savevm.raw`); if (existsSync(dest)) unlinkSync(dest); + // Stale snapshots from a previous pull would resume against the new + // qcow2 and crash; wipe them so capture rebuilds cleanly. if (existsSync(snapshotDest)) unlinkSync(snapshotDest); + if (existsSync(snapshotRawDest)) unlinkSync(snapshotRawDest); const downloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}`, imageDir); if (!downloaded) { throw new CliError(`Artifact qemu-emulator-${arch} not found in workflow run ${runId}.`); } if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`); console.log(`Downloaded: ${dest}`); - if (existsSync(snapshotDest)) { - console.log(`Downloaded: ${snapshotDest}`); - } else { - console.log(`Snapshot not present in artifact for run ${runId}; fast-start disabled.`); - } } else { + // Same stale-snapshot concern as the PR branch above. + const imageDir = emulatorImageDir(); + const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`); + const snapshotRawDest = join(imageDir, `stack-emulator-${arch}.savevm.raw`); + if (existsSync(snapshotDest)) unlinkSync(snapshotDest); + if (existsSync(snapshotRawDest)) unlinkSync(snapshotRawDest); await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag }); } + + if (opts.skipSnapshot) { + console.log("--skip-snapshot: not capturing a local snapshot. First `stack emulator start` will cold-boot."); + } else { + await captureLocalSnapshot(arch); + } }); emulator From 7db9fe405e009d9708e8415d12b62cf2b5cdcb89 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Thu, 16 Apr 2026 11:48:34 -0700 Subject: [PATCH 19/25] fix CI verify step: use freshly-built qcow2 via STACK_EMULATOR_HOME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stack-cli ignores EMULATOR_IMAGE_DIR / EMULATOR_RUN_DIR and derives its own paths from STACK_EMULATOR_HOME (or $HOME/.stack/emulator as default). Without STACK_EMULATOR_HOME set, `emulator start` in CI was silently auto-pulling from the previous `emulator-dev-latest` release instead of using the just-built qcow2 + savevm under ${workspace}. Before: auto-pull got qcow2 + savevm.zst → fast-resume (worked, but tested the PREVIOUS build, not the current one). After my capture-locally change: auto-pull only gets qcow2 → captureLocalSnapshot kicks off a full cold-boot + capture of a stale image, which is slow and can hang in CI. Point STACK_EMULATOR_HOME at the workspace so the CLI finds the just-built images and goes straight to snapshot-resume. Update the smoke-test job's copy step to target STACK_EMULATOR_HOME so both paths stay consistent. --- .github/workflows/qemu-emulator-build.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml index 380a8ab1fc..eaf3ee83e9 100644 --- a/.github/workflows/qemu-emulator-build.yaml +++ b/.github/workflows/qemu-emulator-build.yaml @@ -22,8 +22,14 @@ concurrency: env: EMULATOR_IMAGE_NAME: stack-local-emulator + # Shell scripts (build-image.sh, run-emulator.sh) read these directly. EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run + # The stack-cli ignores EMULATOR_IMAGE_DIR/RUN_DIR and derives its own paths + # from STACK_EMULATOR_HOME. Point it at the same workspace so `emulator + # start` finds the freshly-built qcow2 + savevm from build-image.sh + # (snapshot-resume, fast) instead of auto-pulling from a prior release. + STACK_EMULATOR_HOME: ${{ github.workspace }}/docker/local-emulator/qemu jobs: build: @@ -260,9 +266,9 @@ jobs: - name: Place qcow2 into STACK_EMULATOR_HOME layout run: | - mkdir -p "$HOME/.stack/emulator/images" - cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$HOME/.stack/emulator/images/" - ls -lh "$HOME/.stack/emulator/images/" + mkdir -p "$STACK_EMULATOR_HOME/images" + cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$STACK_EMULATOR_HOME/images/" + ls -lh "$STACK_EMULATOR_HOME/images/" # No savevm.zst artifact (users capture locally via `emulator pull`), # so `emulator start` cold-boots the qcow2. Budget accordingly. @@ -293,7 +299,7 @@ jobs: - name: Print serial log on failure if: failure() - run: tail -100 $HOME/.stack/emulator/run/vm/serial.log 2>/dev/null || true + run: tail -100 "$STACK_EMULATOR_HOME/run/vm/serial.log" 2>/dev/null || true publish: name: Publish to GitHub Releases From 510ef380157edf747e811a27fd64776fbe683651 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Thu, 16 Apr 2026 12:31:53 -0700 Subject: [PATCH 20/25] fix PCI slot mismatch in snapshot capture + stale runtime ISO on direct start - build-image.sh: move runtime.iso drive before netdev so its virtio-blk slot precedes virtio-net-pci, matching run-emulator.sh's resume argv. Previously migrate-incoming against CI's savevm hit a device-tree mismatch and only looked green because snapshot_fallback_to_cold_boot silently retried as cold boot. - run-emulator.sh: drop early-return in ensure_runtime_config_iso so PORT_PREFIX/EMULATOR_*_PORT changes take effect on every start; the preserved ISO from a prior run would otherwise silently override the host-forward ports picked up by QEMU's netdev. - common.sh: fix backslash-escaped JSON in capture_vm_state's migrate- timeout diagnostic; single-quoted printf was emitting literal backslashes, so QMP replied with a parse error instead of the real query-migrate status. --- docker/local-emulator/qemu/build-image.sh | 13 ++++++++--- docker/local-emulator/qemu/common.sh | 4 ++-- docker/local-emulator/qemu/run-emulator.sh | 25 +++++++++------------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh index 26f4765165..36f76d99f4 100755 --- a/docker/local-emulator/qemu/build-image.sh +++ b/docker/local-emulator/qemu/build-image.sh @@ -333,19 +333,25 @@ build_one() { local monitor_sock="$tmp_dir/monitor.sock" local qga_sock="$tmp_dir/qga.sock" local snapshot_args=() + local runtime_disk_args=() local virtfs_args=(-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none") if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + # STACKCFG runtime ISO lets stack.service start during the build — same + # disk shape render-stack-env expects at runtime. Placed before netdev + # so its virtio-blk PCI slot precedes virtio-net-pci, matching the + # resume argv order in run-emulator.sh (slots must line up or + # migrate-incoming fails the device-tree check). + runtime_disk_args=( + -drive "file=$runtime_iso,format=raw,if=virtio,readonly=on" + ) # QMP for stop/migrate/quit; virtio-serial + QGA channel so we can exec # inside the guest post-resume (only needed at runtime but harmless here). - # STACKCFG runtime ISO lets stack.service start during the build — same - # disk shape render-stack-env expects at runtime. snapshot_args=( -chardev "socket,id=monitor,path=$monitor_sock,server=on,wait=off" -mon "chardev=monitor,mode=control" -chardev "socket,path=$qga_sock,server=on,wait=off,id=qga0" -device virtio-serial -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0" - -drive "file=$runtime_iso,format=raw,if=virtio,readonly=on" # Empty PCIe root port reserved for runtime hot-plug of virtio-9p. # The integrated pcie.0 bus on q35 / arm64-virt is static — hotplug # only works through a root port. Must be present at snapshot capture @@ -367,6 +373,7 @@ build_one() { -drive "file=$tmp_img,format=qcow2,if=virtio,discard=on,detect-zeroes=unmap" \ -drive "file=$seed_iso,format=raw,if=virtio,readonly=on" \ -drive "file=$bundle_iso,format=raw,if=virtio,readonly=on" \ + ${runtime_disk_args[@]+"${runtime_disk_args[@]}"} \ -netdev user,id=net0 \ -device virtio-net-pci,netdev=net0 \ ${virtfs_args[@]+"${virtfs_args[@]}"} \ diff --git a/docker/local-emulator/qemu/common.sh b/docker/local-emulator/qemu/common.sh index 38385e308b..f5d3392d9d 100755 --- a/docker/local-emulator/qemu/common.sh +++ b/docker/local-emulator/qemu/common.sh @@ -193,8 +193,8 @@ capture_vm_state() { if [ "$waited" -ge "$migrate_timeout" ]; then err "QMP migrate timed out after ${migrate_timeout}s" err "Last query-migrate response: $({ - printf '%s\n' '{\"execute\":\"qmp_capabilities\"}' - printf '%s\n' '{\"execute\":\"query-migrate\"}' + printf '%s\n' '{"execute":"qmp_capabilities"}' + printf '%s\n' '{"execute":"query-migrate"}' } | qmp_session "$sock" 2>/dev/null || true)" return 1 fi diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh index aba9311b04..0845ff1539 100755 --- a/docker/local-emulator/qemu/run-emulator.sh +++ b/docker/local-emulator/qemu/run-emulator.sh @@ -168,16 +168,12 @@ runtime_fingerprint() { } ensure_runtime_config_iso() { - local cfg_iso - cfg_iso="$(runtime_iso_path)" - if [ -s "$cfg_iso" ]; then - return 0 - fi - - # Fallback used when this script is invoked directly (e.g. `pnpm - # emulator:start`) rather than through the stack-cli, which generates the - # ISO via packages/stack-cli/src/lib/iso.ts. Mirrors the field set + volume - # label so the guest's render-stack-env mounts it the same way. + # Regenerate unconditionally: port env vars (PORT_PREFIX, EMULATOR_*_PORT) + # may have changed since the last run, and an ISO cached from a prior + # invocation would silently override them. The stack-cli path writes the + # ISO first via packages/stack-cli/src/lib/iso.ts; this re-write produces + # the same content for that flow (same field set + volume label) and is + # cheap enough (~ms) to run on every start. write_runtime_config_iso "$VM_DIR" } @@ -740,10 +736,9 @@ stop_vm() { fi fi rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock" "$VM_DIR/serial.log" - # Do NOT remove runtime-config.iso: the CLI owns its lifecycle and run-emulator.sh - # cannot regenerate it. Removing here breaks the snapshot → cold-boot fallback - # (which calls stop_vm before recursing into cmd_start → ensure_runtime_config_iso). - # `cmd_reset` wipes $RUN_DIR entirely when a full reset is wanted. + # runtime-config.iso is left in place; ensure_runtime_config_iso regenerates + # it on the next start. `cmd_reset` wipes $RUN_DIR entirely when a full reset + # is wanted. } cmd_start() { @@ -854,7 +849,7 @@ snapshot_fallback_to_cold_boot() { warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..." stop_vm # Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one. - # runtime-config.iso is preserved by stop_vm (the CLI owns it). + # runtime-config.iso is regenerated by ensure_runtime_config_iso on recursion. rm -f "$VM_DIR/disk.qcow2" "$VM_DIR/base-image.fingerprint" \ "$VM_DIR/seed.phantom" "$VM_DIR/bundle.phantom" EMULATOR_NO_SNAPSHOT=1 From 39b5c083e704291a950678ccb91a796a06c5357c Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Thu, 16 Apr 2026 13:56:33 -0700 Subject: [PATCH 21/25] fix smoke test: skip shell ISO regen when CLI already wrote it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ensure_runtime_config_iso unconditionally fell through to make_iso_from_dir, which still required hdiutil/mkisofs/genisoimage — the host dep the lib/iso.ts TS writer was supposed to remove. The Smoke Test job doesn't install genisoimage, so emulator start failed. CLI now sets STACK_EMULATOR_CLI_WROTE_ISO=1 and the shell short-circuits when that flag plus a non-empty ISO are present. --- docker/local-emulator/qemu/run-emulator.sh | 18 ++++++++++++------ packages/stack-cli/src/commands/emulator.ts | 6 +++++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh index 0845ff1539..088d08a9f0 100755 --- a/docker/local-emulator/qemu/run-emulator.sh +++ b/docker/local-emulator/qemu/run-emulator.sh @@ -168,12 +168,18 @@ runtime_fingerprint() { } ensure_runtime_config_iso() { - # Regenerate unconditionally: port env vars (PORT_PREFIX, EMULATOR_*_PORT) - # may have changed since the last run, and an ISO cached from a prior - # invocation would silently override them. The stack-cli path writes the - # ISO first via packages/stack-cli/src/lib/iso.ts; this re-write produces - # the same content for that flow (same field set + volume label) and is - # cheap enough (~ms) to run on every start. + # When invoked via stack-cli, the CLI writes the runtime ISO natively + # (packages/stack-cli/src/lib/iso.ts) immediately before spawning us and + # sets STACK_EMULATOR_CLI_WROTE_ISO=1. Trust it and skip regeneration — + # otherwise we'd fall through to make_iso_from_dir and require + # hdiutil/mkisofs/genisoimage, which is exactly the host dep the CLI path + # is designed to remove. + if [ "$STACK_EMULATOR_CLI_WROTE_ISO" = "1" ] && [ -s "$(runtime_iso_path)" ]; then + return 0 + fi + # Direct-shell invocation path: regenerate unconditionally. Port env vars + # (PORT_PREFIX, EMULATOR_*_PORT) may have changed since the last run, and + # an ISO cached from a prior invocation would silently override them. write_runtime_config_iso "$VM_DIR" } diff --git a/packages/stack-cli/src/commands/emulator.ts b/packages/stack-cli/src/commands/emulator.ts index f9d6a6aae4..3a67d26c07 100644 --- a/packages/stack-cli/src/commands/emulator.ts +++ b/packages/stack-cli/src/commands/emulator.ts @@ -244,7 +244,11 @@ async function startEmulator(arch: "arm64" | "amd64"): Promise { await captureLocalSnapshot(arch); } prepareRuntimeConfigIso(); - await runEmulator("start", { EMULATOR_ARCH: arch }); + // Signal to run-emulator.sh that runtime-config.iso was written by the CLI + // via lib/iso.ts; the shell's ensure_runtime_config_iso should trust it and + // skip its own regeneration (which would otherwise require the + // hdiutil/mkisofs/genisoimage host dep the TS writer replaces). + await runEmulator("start", { EMULATOR_ARCH: arch, STACK_EMULATOR_CLI_WROTE_ISO: "1" }); } export function resolveArch(raw?: string): "arm64" | "amd64" { From 7acb3ed20b541b6ff0d66e702866a9dfc9405421 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Thu, 16 Apr 2026 15:30:46 -0700 Subject: [PATCH 22/25] fix capture path: guard against set -u + preserve cmd_capture's empty-host ISO Two bugs surfaced by end-to-end testing against a freshly-built qcow2: 1. $STACK_EMULATOR_CLI_WROTE_ISO was referenced unguarded under `set -u`, so any code path that didn't set it (capture, direct-shell) tripped `unbound variable` before reaching the early-return. Use :- default. 2. ensure_runtime_config_iso was overwriting cmd_capture's specialized empty-VM_DIR_HOST ISO with the host-dir variant. Since virtfs is detached in capture mode, run-stack-container then tried to publish internal-pck to /host/... and restart-looped stack.service, so no service ever became healthy and capture aborted after 240s. Previously masked by snapshot_fallback_to_cold_boot; 510ef3801 fixed the fallback mask and exposed this. Skip regen when EMULATOR_CAPTURING_SNAPSHOT=1. --- docker/local-emulator/qemu/run-emulator.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docker/local-emulator/qemu/run-emulator.sh b/docker/local-emulator/qemu/run-emulator.sh index 088d08a9f0..7b52436e19 100755 --- a/docker/local-emulator/qemu/run-emulator.sh +++ b/docker/local-emulator/qemu/run-emulator.sh @@ -174,7 +174,15 @@ ensure_runtime_config_iso() { # otherwise we'd fall through to make_iso_from_dir and require # hdiutil/mkisofs/genisoimage, which is exactly the host dep the CLI path # is designed to remove. - if [ "$STACK_EMULATOR_CLI_WROTE_ISO" = "1" ] && [ -s "$(runtime_iso_path)" ]; then + if [ "${STACK_EMULATOR_CLI_WROTE_ISO:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then + return 0 + fi + # In capture mode, cmd_capture already wrote a specialized ISO with an + # empty STACK_EMULATOR_VM_DIR_HOST — required because virtfs is detached + # for snapshot compatibility, and run-stack-container would otherwise + # try to publish internal-pck to /host/... and restart-loop + # stack.service. Trust that write and don't overwrite it. + if [ "${EMULATOR_CAPTURING_SNAPSHOT:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then return 0 fi # Direct-shell invocation path: regenerate unconditionally. Port env vars From 8f9b9c1046ced8b24fbee9880329c5e1a7621db0 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Mon, 20 Apr 2026 10:40:42 -0700 Subject: [PATCH 23/25] emulator build: split snapshot-bake from savevm capture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Splits EMULATOR_BUILD_SNAPSHOT into two independent flags: * EMULATOR_BUILD_SNAPSHOT (default 1) — bake placeholder PCK/SSK/SAK/ CRON_SECRET into the qcow2 so runtime rotate-secrets can swap them per install. Cheap; no extra wall time. * EMULATOR_CAPTURE_SAVEVM (default 0) — start the stack, wait for backend+dashboard health, then capture savevm.zst via QMP. Implies BUILD_SNAPSHOT. CI never captures (snapshots aren't portable across KVM/HVF/TCG; users capture locally on first `stack emulator pull`). The previous default of capturing in CI was wasted work on amd64 and made arm64 fail — wait-for-stack-ready couldn't bring the stack up under cross-arch TCG inside its 600s budget, so cloud-final.service was marked failed. --- .github/workflows/qemu-emulator-build.yaml | 6 +- docker/local-emulator/qemu/build-image.sh | 94 +++++++++++-------- .../qemu/cloud-init/emulator/user-data | 11 ++- 3 files changed, 69 insertions(+), 42 deletions(-) diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml index eaf3ee83e9..5792b7f600 100644 --- a/.github/workflows/qemu-emulator-build.yaml +++ b/.github/workflows/qemu-emulator-build.yaml @@ -27,8 +27,10 @@ env: EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run # The stack-cli ignores EMULATOR_IMAGE_DIR/RUN_DIR and derives its own paths # from STACK_EMULATOR_HOME. Point it at the same workspace so `emulator - # start` finds the freshly-built qcow2 + savevm from build-image.sh - # (snapshot-resume, fast) instead of auto-pulling from a prior release. + # start` finds the freshly-built qcow2 from build-image.sh and cold-boots + # it, instead of auto-pulling from a prior release. CI doesn't capture a + # savevm (EMULATOR_CAPTURE_SAVEVM defaults to 0); users capture locally + # on first `stack emulator pull`. STACK_EMULATOR_HOME: ${{ github.workspace }}/docker/local-emulator/qemu jobs: diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh index 36f76d99f4..741d60d029 100755 --- a/docker/local-emulator/qemu/build-image.sh +++ b/docker/local-emulator/qemu/build-image.sh @@ -12,22 +12,34 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" DEBIAN_VERSION="${DEBIAN_VERSION:-13}" DISK_SIZE="${EMULATOR_DISK_SIZE:-12G}" RAM="${EMULATOR_BUILD_RAM:-4096}" -# Snapshot mode pins SMP to a fixed value so the runtime QEMU command (which -# uses EMULATOR_CPUS, default 4) can match the source device topology — RAM -# migration replay requires identical vCPU count. -if [ "${EMULATOR_BUILD_SNAPSHOT:-1}" = "1" ]; then +PROVISION_TIMEOUT="${EMULATOR_PROVISION_TIMEOUT:-3200}" +EMULATOR_IMAGE_NAME="${EMULATOR_IMAGE_NAME:-stack-local-emulator}" +# Snapshot-ready qcow2: bake deterministic placeholder secrets (PCK/SSK/SAK/ +# CRON_SECRET) into the image so runtime `rotate-secrets` can swap them for +# fresh per-install values on every `emulator start`. Without this, the image +# would ship with random shared secrets — a security regression. Cheap to +# build (no extra wall-clock cost in CI), so it stays on by default. +EMULATOR_BUILD_SNAPSHOT="${EMULATOR_BUILD_SNAPSHOT:-1}" +# Capture RAM/device state via QMP at build time, producing a +# `stack-emulator-.savevm.zst` next to the qcow2. Off by default — +# users capture locally on first `stack emulator pull` (run-emulator.sh +# capture) because migration state isn't portable across accelerators +# (KVM/HVF/TCG) or `-cpu max` feature sets, so a CI-captured snapshot +# couldn't resume reliably on arbitrary user hardware. Implies +# EMULATOR_BUILD_SNAPSHOT=1. +EMULATOR_CAPTURE_SAVEVM="${EMULATOR_CAPTURE_SAVEVM:-0}" +if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ] && [ "$EMULATOR_BUILD_SNAPSHOT" != "1" ]; then + echo "EMULATOR_CAPTURE_SAVEVM=1 requires EMULATOR_BUILD_SNAPSHOT=1" >&2 + exit 1 +fi +# Capture mode pins SMP to a fixed value so the resume QEMU command (which +# uses EMULATOR_CPUS, default 4) can match the captured device topology — +# RAM migration replay requires identical vCPU count. +if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then CPUS="${EMULATOR_BUILD_CPUS:-4}" else CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}" fi -PROVISION_TIMEOUT="${EMULATOR_PROVISION_TIMEOUT:-3200}" -EMULATOR_IMAGE_NAME="${EMULATOR_IMAGE_NAME:-stack-local-emulator}" -# Snapshot build mode: bring the VM to a fully-warm state (backend + dashboard -# responding), then capture RAM/device state via QMP so that `emulator start` -# can -incoming from it and return in ~3-8s. Enabled by default; set -# EMULATOR_BUILD_SNAPSHOT=0 to fall back to the legacy "shutdown after -# provisioning" flow. -EMULATOR_BUILD_SNAPSHOT="${EMULATOR_BUILD_SNAPSHOT:-1}" RED='\033[0;31m' GREEN='\033[0;32m' @@ -67,7 +79,7 @@ check_deps() { command -v docker >/dev/null 2>&1 || missing+=("docker") fi - if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then for cmd in socat zstd; do command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd") done @@ -297,33 +309,41 @@ build_one() { cp "$bundle_tgz" "$bundle_dir/img.tgz" cp "$BUILD_ENV_FILE" "$bundle_dir/build.env" if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then - # Guest reads this flag to use placeholder secrets and to wait at the end - # of provision-build for the host to snapshot the RAM state. + # Guest reads this flag to use deterministic placeholder secrets so that + # runtime rotate-secrets can swap them out per-install. printf 'STACK_EMULATOR_BUILD_SNAPSHOT=1\n' >> "$bundle_dir/build.env" fi + if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then + # Guest reads this flag to start stack.service during provision-build, + # wait for backend+dashboard health, then block forever waiting for the + # host to capture VM state via QMP (stop + migrate + quit). + printf 'STACK_EMULATOR_CAPTURE_SAVEVM=1\n' >> "$bundle_dir/build.env" + fi # Tell the guest which arch it's being built for so cross-arch (TCG) builds # can skip the smoke test, which isn't reliable under software emulation. printf 'STACK_EMULATOR_BUILD_ARCH=%s\n' "$arch" > "$bundle_dir/build-arch.env" make_iso_from_dir "$bundle_iso" "STACKBUNDLE" "$bundle_dir" - # render-stack-env (inside the guest) mounts a STACKCFG disk containing - # runtime.env + base.env. At runtime the host-side run-emulator.sh builds - # this ISO; at build time stack.service also starts the container, so we - # must provide the same shape here. Values mirror the defaults the runtime - # would supply — port-prefix 81 and matching host-port numbers (unused at - # build time since nothing is port-forwarded, but render-stack-env embeds - # them into /run/stack-auth/local-emulator.env). - mkdir -p "$runtime_cfg_dir" - { - printf 'STACK_EMULATOR_PORT_PREFIX=81\n' - printf 'STACK_EMULATOR_DASHBOARD_HOST_PORT=26700\n' - printf 'STACK_EMULATOR_BACKEND_HOST_PORT=26701\n' - printf 'STACK_EMULATOR_MINIO_HOST_PORT=26702\n' - printf 'STACK_EMULATOR_INBUCKET_HOST_PORT=26703\n' - printf 'STACK_EMULATOR_VM_DIR_HOST=\n' - } > "$runtime_cfg_dir/runtime.env" - cp "$BUILD_ENV_FILE" "$runtime_cfg_dir/base.env" - make_iso_from_dir "$runtime_iso" "STACKCFG" "$runtime_cfg_dir" + if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then + # render-stack-env (inside the guest) mounts a STACKCFG disk containing + # runtime.env + base.env. At runtime the host-side run-emulator.sh builds + # this ISO; in capture mode stack.service also starts during the build, + # so we must provide the same shape here. Values mirror the defaults the + # runtime would supply — port-prefix 81 and matching host-port numbers + # (unused at build time since nothing is port-forwarded, but + # render-stack-env embeds them into /run/stack-auth/local-emulator.env). + mkdir -p "$runtime_cfg_dir" + { + printf 'STACK_EMULATOR_PORT_PREFIX=81\n' + printf 'STACK_EMULATOR_DASHBOARD_HOST_PORT=26700\n' + printf 'STACK_EMULATOR_BACKEND_HOST_PORT=26701\n' + printf 'STACK_EMULATOR_MINIO_HOST_PORT=26702\n' + printf 'STACK_EMULATOR_INBUCKET_HOST_PORT=26703\n' + printf 'STACK_EMULATOR_VM_DIR_HOST=\n' + } > "$runtime_cfg_dir/runtime.env" + cp "$BUILD_ENV_FILE" "$runtime_cfg_dir/base.env" + make_iso_from_dir "$runtime_iso" "STACKCFG" "$runtime_cfg_dir" + fi : > "$serial_log" : > "$provision_log" @@ -335,7 +355,7 @@ build_one() { local snapshot_args=() local runtime_disk_args=() local virtfs_args=(-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none") - if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then # STACKCFG runtime ISO lets stack.service start during the build — same # disk shape render-stack-env expects at runtime. Placed before netdev # so its virtio-blk PCI slot precedes virtio-net-pci, matching the @@ -360,7 +380,7 @@ build_one() { ) # QEMU disallows migration when virtfs is mounted in the guest — virtfs # has guest-side state (open handles, mount table) that isn't migratable. - # Drop the host fs mount in snapshot mode; STACK_SERVICES_READY still + # Drop the host fs mount in capture mode; STACK_SERVICES_READY still # arrives on the serial log so contains_provision_marker can detect it. virtfs_args=() fi @@ -385,7 +405,7 @@ build_one() { pid="$(cat "$pidfile")" local ready_marker="STACK_CLOUD_INIT_DONE" - if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then ready_marker="STACK_SERVICES_READY" fi elapsed=0 @@ -450,7 +470,7 @@ build_one() { exit 1 fi - if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then + if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then local savevm_file="$IMAGE_DIR/stack-emulator-${arch}.savevm.zst" local savevm_raw="$tmp_dir/state.raw" local savevm_tmp="$tmp_dir/state.zst" diff --git a/docker/local-emulator/qemu/cloud-init/emulator/user-data b/docker/local-emulator/qemu/cloud-init/emulator/user-data index b3c21527b4..6ec0976192 100644 --- a/docker/local-emulator/qemu/cloud-init/emulator/user-data +++ b/docker/local-emulator/qemu/cloud-init/emulator/user-data @@ -734,13 +734,18 @@ write_files: log_provision "starting slim-docker-image" bash /usr/local/bin/slim-docker-image - # Snapshot-build mode: bring the stack container up, wait for full + # Capture mode: bring the stack container up, wait for full # readiness, emit STACK_SERVICES_READY, then wait indefinitely for the # host build script to capture VM state over QMP (stop + migrate + quit). # The VM never shuts itself down in this path — the host tears it down # once the savevm file has been written. - if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then - log_provision "snapshot-build mode: starting stack.service" + # + # CI never sets STACK_EMULATOR_CAPTURE_SAVEVM=1 (snapshots aren't + # portable across accelerators, so they're captured locally on first + # `stack emulator pull`). This branch only fires for opt-in local + # builds run with EMULATOR_CAPTURE_SAVEVM=1. + if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_CAPTURE_SAVEVM=1' /etc/stack-build.env 2>/dev/null; then + log_provision "capture mode: starting stack.service" systemctl start stack.service || true log_provision "waiting for backend + dashboard to be ready" From fbd32073005d01b9433591ba63b4c44706b82700 Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Mon, 20 Apr 2026 11:17:33 -0700 Subject: [PATCH 24/25] seed: bump session activity events tx timeout to 30s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prisma's default interactive-transaction timeout is 5s, but under cross-arch arm64 TCG in the emulator qcow2 build this single batch (deleteMany + createMany for events + ipInfos) takes ~10s. Bump just this call to 30s. Production (KVM/native) runs it in <1s, so the looser bound only engages when the DB is genuinely slow. Per-call option — no other transaction is affected. --- apps/backend/src/lib/seed-dummy-data.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/apps/backend/src/lib/seed-dummy-data.ts b/apps/backend/src/lib/seed-dummy-data.ts index 346724680f..c784d6e439 100644 --- a/apps/backend/src/lib/seed-dummy-data.ts +++ b/apps/backend/src/lib/seed-dummy-data.ts @@ -1485,6 +1485,12 @@ async function seedDummySessionActivityEvents(options: SessionActivityEventSeedO await tx.event.createMany({ data: events, }); + }, { + // Under cross-arch arm64 TCG in the emulator qcow2 build, this batch + // takes ~10s; Prisma's default is 5s. Production (KVM/native) runs it + // in well under 1s, so the looser bound only kicks in when the DB is + // genuinely slow. + timeout: 30_000, }); if (clickhouseClient && clickhouseRows.length > 0) { From c8630c6e6b06b540411fa9c5baf87548c40881bf Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Mon, 20 Apr 2026 11:54:30 -0700 Subject: [PATCH 25/25] =?UTF-8?q?emulator:=20bump=20Postgres=20statement?= =?UTF-8?q?=5Ftimeout=2030s=20=E2=86=92=20120s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit runBulldozerPaymentsInit / paginatedIngress issues a single $executeRaw per row that writes a JSONB payload. Under cross-arch arm64 TCG in the qcow2 build it takes ~31s per row and Postgres kills it with code 57014 (canceling statement due to statement timeout). 120s covers the observed time with a ~4× safety margin. --- docker/local-emulator/supervisord.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/local-emulator/supervisord.conf b/docker/local-emulator/supervisord.conf index 6ceffee211..a9d3d51da8 100644 --- a/docker/local-emulator/supervisord.conf +++ b/docker/local-emulator/supervisord.conf @@ -25,7 +25,7 @@ command=/usr/lib/postgresql/16/bin/postgres -c max_connections=500 -c shared_preload_libraries=pg_stat_statements -c pg_stat_statements.track=all - -c statement_timeout=30s + -c statement_timeout=120s user=postgres autostart=true autorestart=true