Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 37 additions & 12 deletions mise-tasks/dev
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,32 @@ init_pidfile
# without its trap firing. See `spawn_cleanup_guardian` in lib/dev-common.sh.
spawn_cleanup_guardian $$

cleanup() {
trap - EXIT INT TERM HUP
kill_from_pidfile
sweep_orphaned_services
echo "[dev] All dev-stack processes stopped."
# Fast shutdown helper: SIGTERM every recorded pgroup and return. The slow
# TERM→sleep→KILL escalation and the sweep_orphaned_services pass run in the
# background under the cleanup guardian (spawned at script start), which
# polls this script's PID and finishes the job after we exit. Doing the
# slow work in the foreground would exceed mise's per-task SIGTERM→SIGKILL
# grace and cause mise to WIFSIGNAL us — surfacing as
# `ERROR … exited with non-zero status: no exit status` even though the
# eventual cleanup is correct.
fast_shutdown_kick() {
if [ -f "$PIDFILE" ]; then
while IFS='=' read -r _fc_label _fc_pid; do
[ -z "$_fc_pid" ] && continue
kill -TERM -- "-$_fc_pid" 2>/dev/null || true
done < "$PIDFILE"
fi
}
# HUP is required: when the user hits Ctrl-C in a terminal running `mise run
# dev`, mise often exits without forwarding SIGINT to this bash script, and
# bash then receives SIGHUP from the dying parent. Bash's default action on
# an untrapped SIGHUP is to terminate *without* running the EXIT trap, so
# without `HUP` here the cleanup never fires and the entire subtree leaks.
trap cleanup EXIT INT TERM HUP
# EXIT trap fires on every exit path (normal, signal, error). Kick TERMs so
# the guardian can finish cleanup even when the INT/TERM/HUP trap below
# never fires — e.g. with `set -m`, Ctrl-C in the controlling terminal is
# delivered to SAT_PID's pgroup rather than this bash, so this shell only
# notices via the `wait` returning a signal-induced code (handled at the
# bottom of the script).
trap fast_shutdown_kick EXIT
# Direct signal path (Ctrl-C reaches this bash, or parent SIGTERM/SIGHUP):
# kick the same fast cleanup and exit 0 so mise records WIFEXITED(0).
trap 'fast_shutdown_kick; trap - EXIT INT TERM HUP; echo "[dev] Shutdown initiated; background cleanup will complete shortly."; exit 0' INT TERM HUP

WAIT_ON_TIMEOUT=7200000 NODE_NO_WARNINGS=1 start-server-and-test \
'run-p -ln start:pg start:matrix start:smtp start:prerender-dev start:prerender-manager-dev start:worker-development start:development' \
Expand All @@ -60,4 +74,15 @@ WAIT_ON_TIMEOUT=7200000 NODE_NO_WARNINGS=1 start-server-and-test \
SAT_PID=$!
record_dev_pid sat "$SAT_PID"
wait "$SAT_PID"
exit $?
RC=$?
# `set -m` puts SAT_PID in its own pgroup, so Ctrl-C in the controlling
# terminal is delivered to SAT's pgroup — not to this bash — and `wait`
# returns a 128+signal exit code without the INT/TERM/HUP trap firing.
# Normalize that to 0 so mise records WIFEXITED(0) instead of treating
# the expected shutdown as a task failure. The EXIT trap still runs
# fast_shutdown_kick, and the cleanup guardian completes the KILL
# escalation + sweep after we exit.
if [ "$RC" -ge 128 ]; then
RC=0
fi
exit $RC
49 changes: 37 additions & 12 deletions mise-tasks/dev-all
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,32 @@ spawn_cleanup_guardian $$
pnpm --filter @cardstack/host start &
HOST_PID=$!
record_dev_pid host "$HOST_PID"
cleanup() {
trap - EXIT INT TERM HUP
kill_from_pidfile
sweep_orphaned_services
echo "[dev-all] All dev-stack processes stopped."
# Fast shutdown helper: SIGTERM every recorded pgroup and return. The slow
# TERM→sleep→KILL escalation and the sweep_orphaned_services pass run in the
# background under the cleanup guardian (spawned at script start), which
# polls this script's PID and finishes the job after we exit. Doing the
# slow work in the foreground here would exceed mise's per-task SIGTERM→
# SIGKILL grace and cause mise to WIFSIGNAL us — surfacing as
# `ERROR … exited with non-zero status: no exit status` even though the
# eventual cleanup is correct.
fast_shutdown_kick() {
if [ -f "$PIDFILE" ]; then
while IFS='=' read -r _fc_label _fc_pid; do
[ -z "$_fc_pid" ] && continue
kill -TERM -- "-$_fc_pid" 2>/dev/null || true
done < "$PIDFILE"
fi
}
# HUP is required: when the user hits Ctrl-C in a terminal running `mise run
# dev-all`, mise often exits without forwarding SIGINT to this bash script,
# and bash then receives SIGHUP from the dying parent. Bash's default action
# on an untrapped SIGHUP is to terminate *without* running the EXIT trap, so
# without `HUP` here the cleanup never fires and the entire subtree leaks.
trap cleanup EXIT INT TERM HUP
# EXIT trap fires on every exit path (normal, signal, error). Kick TERMs so
# the guardian can finish cleanup even when the INT/TERM/HUP trap below
# never fires — e.g. with `set -m`, Ctrl-C in the controlling terminal is
# delivered to SAT_PID's pgroup rather than this bash, so this shell only
# notices via the `wait` returning a signal-induced code (handled at the
# bottom of the script).
trap fast_shutdown_kick EXIT
# Direct signal path (Ctrl-C reaches this bash, or parent SIGTERM/SIGHUP):
# kick the same fast cleanup and exit 0 so mise records WIFEXITED(0).
trap 'fast_shutdown_kick; trap - EXIT INT TERM HUP; echo "[dev-all] Shutdown initiated; background cleanup will complete shortly."; exit 0' INT TERM HUP

HOST_TIMEOUT=120
ELAPSED=0
Expand Down Expand Up @@ -94,4 +108,15 @@ WAIT_ON_TIMEOUT=7200000 NODE_NO_WARNINGS=1 start-server-and-test \
SAT_PID=$!
record_dev_pid sat "$SAT_PID"
wait "$SAT_PID"
exit $?
RC=$?
# `set -m` puts SAT_PID in its own pgroup, so Ctrl-C in the controlling
# terminal is delivered to SAT's pgroup — not to this bash — and `wait`
# returns a 128+signal exit code without the INT/TERM/HUP trap firing.
# Normalize that to 0 so mise records WIFEXITED(0) instead of treating
# the expected shutdown as a task failure. The EXIT trap still runs
# fast_shutdown_kick, and the cleanup guardian completes the KILL
# escalation + sweep after we exit.
if [ "$RC" -ge 128 ]; then
RC=0
fi
exit $RC
25 changes: 18 additions & 7 deletions mise-tasks/lib/dev-common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,23 @@ _kill_tree_walk() {
# (4201/4202, 4210/4211, 4221/4222). Wrappers that just invoke ts-node
# don't `exec` it, so killing the wrapper alone leaves the ts-node
# grandchild reparented to init with its port still bound.
# - packages/host/scripts/vite-serve.js — the host start wrapper that
# spawns the actual vite child. (We don't separately sweep `pnpm
# --filter @cardstack/host start`: its only child IS vite-serve.js, so
# - scripts/vite-serve.js — the host start wrapper that spawns the
# actual vite child. Can't anchor to $REPO_ROOT because pnpm invokes
# it as `node scripts/vite-serve.js` (relative argv, cwd-relative),
# so the absolute path never appears in argv for pkill -f to match.
# The filename is unique to Boxel's host package, so the relative
# pattern is safe from cross-tool collisions (only a sibling Boxel
# checkout running dev concurrently could collide, which already
# requires BOXEL_DEV_ALL_PIDFILE isolation). Killing this wrapper
# also frees the same-port redirect dispatcher it owns on 4200 in
# local-HTTPS dev mode. (We don't separately sweep `pnpm --filter
# @cardstack/host start`: its only child IS vite-serve.js, so
# killing the anchored child causes pnpm to exit on its own.)
# - packages/host/.*vite/bin/vite.js --port 4200 — the host dev server
# (port 4200) spawned by vite-serve.js
# - packages/host/.*vite/bin/vite.js — the host vite process. In
# plain-HTTP mode it binds the public port (4200) directly; in
# local-HTTPS mode the wrapper puts it on a dynamic internal port
# and the dispatcher fronts 4200. Don't pin the pattern to a specific
# `--port` value or the dynamic-port case escapes the sweep.
# - node_modules/.*/start-server-and-test/src/bin/start.js — the
# phase-coordinator that owns the run-p subtree
# - node_modules/.*/npm-run-all/bin/run-p — run-p, which spawns the
Expand All @@ -221,8 +232,8 @@ _kill_tree_walk() {
sweep_orphaned_services() {
REPO_ROOT_RE="$(printf '%s' "$REPO_ROOT" | sed -E 's/[][\\.*^$+?(){}|]/\\&/g')"
TSNODE_RE="${REPO_ROOT_RE}/packages/realm-server/node_modules.*--transpileOnly (worker|main|prerender)"
VITE_SERVE_RE="${REPO_ROOT_RE}/packages/host/scripts/vite-serve\.js"
VITE_BIN_RE="${REPO_ROOT_RE}/packages/host/.*vite/bin/vite\.js --port 4200"
VITE_SERVE_RE="scripts/vite-serve\.js"
VITE_BIN_RE="${REPO_ROOT_RE}/packages/host/.*vite/bin/vite\.js"
SAT_RE="${REPO_ROOT_RE}/.*node_modules/.*start-server-and-test/src/bin/start\.js"
RUNP_RE="${REPO_ROOT_RE}/.*node_modules/.*npm-run-all/bin/run-p"
HTTP_SERVER_RE="http-server.*X-Boxel-Assume-User.*--port 4206"
Expand Down
5 changes: 5 additions & 0 deletions mise-tasks/services/prerender
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
# rejects it as "Illegal option", so this script must run under bash.
set -o pipefail

# Treat SIGTERM/SIGINT as a clean shutdown so mise / run-p don't report
# 143-on-Ctrl-C as a task failure. See mise-tasks/services/worker for the
# rationale.
trap 'exit 0' INT TERM

# Tee stdout+stderr to a per-service log file so the local Alloy scraper
# (packages/observability/alloy/config.alloy) can pick this up via
# loki.source.file. Alloy's Docker discovery doesn't see native processes.
Expand Down
5 changes: 5 additions & 0 deletions mise-tasks/services/prerender-mgr
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
# rejects it as "Illegal option", so this script must run under bash.
set -o pipefail

# Treat SIGTERM/SIGINT as a clean shutdown so mise / run-p don't report
# 143-on-Ctrl-C as a task failure. See mise-tasks/services/worker for the
# rationale.
trap 'exit 0' INT TERM

# Tee stdout+stderr to a per-service log file so the local Alloy scraper
# (packages/observability/alloy/config.alloy) can pick this up via
# loki.source.file. Alloy's Docker discovery doesn't see native processes.
Expand Down
10 changes: 9 additions & 1 deletion mise-tasks/services/realm-server
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,15 @@ cleanup_icons_server() {
kill "$ICONS_PID" >/dev/null 2>&1 || true
fi
}
trap cleanup_icons_server EXIT INT TERM
# EXIT runs cleanup unconditionally. INT/TERM additionally exit 0 so that the
# 143-on-Ctrl-C from the ts-node pipeline below isn't reported by mise /
# run-p as "task failed" — see mise-tasks/services/worker for the broader
# rationale. Real crashes still propagate (ts-node faulting on its own
# leaves the trap unfired, so pipefail's 143-aware exit code wins).
# The signal handler clears the EXIT trap before exiting so cleanup doesn't
# run twice in succession.
trap cleanup_icons_server EXIT
trap 'trap - EXIT; cleanup_icons_server; exit 0' INT TERM

pnpm --dir=../skills-realm skills:setup

Expand Down
5 changes: 5 additions & 0 deletions mise-tasks/services/realm-server-base
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
#MISE depends=["infra:ensure-dev-cert", "infra:ensure-pg", "infra:wait-for-prerender"]
#MISE dir="packages/realm-server"

# Treat SIGTERM/SIGINT as a clean shutdown so mise / run-p don't report
# 143-on-Ctrl-C as a task failure. See mise-tasks/services/worker for the
# rationale.
trap 'exit 0' INT TERM

if [ -z "$MATRIX_REGISTRATION_SHARED_SECRET" ]; then
MATRIX_REGISTRATION_SHARED_SECRET=$(ts-node --transpileOnly ./scripts/matrix-registration-secret.ts)
export MATRIX_REGISTRATION_SHARED_SECRET
Expand Down
10 changes: 9 additions & 1 deletion mise-tasks/services/test-realms
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@ if [ -z "$BOXEL_ENVIRONMENT" ]; then
kill "$ICONS_PID" >/dev/null 2>&1 || true
fi
}
trap cleanup_icons_server EXIT INT TERM
trap cleanup_icons_server EXIT
# INT/TERM additionally exit 0 — see mise-tasks/services/worker for why.
# Clearing EXIT inside the signal handler avoids running cleanup twice.
trap 'trap - EXIT; cleanup_icons_server; exit 0' INT TERM
else
# Even without an icons server to clean up, swallow signal-induced 143s so
# the orchestrator's Ctrl-C doesn't surface as `ERROR: "start:test-realms"
# exited with 143`.
trap 'exit 0' INT TERM
fi

if [ -n "$ENV_MODE" ]; then
Expand Down
8 changes: 8 additions & 0 deletions mise-tasks/services/worker
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
# as "Illegal option", so this script must run under bash.
set -o pipefail

# Treat SIGTERM/SIGINT as a clean shutdown. The dev/dev-all orchestrators
# SIGTERM this script during Ctrl-C; without this trap the pipefail-aware
# pipe below exits 143, mise reports it as "task failed", and run-p prints
# `ERROR: "start:worker-development" exited with 143` — pure shutdown
# noise. Real crashes (ts-node faulting on its own) still propagate, since
# the trap only fires when *we* receive the signal.
trap 'exit 0' INT TERM

# Tee stdout+stderr to a per-service log file so the local Alloy scraper
# (packages/observability/alloy/config.alloy) can pick this up via
# loki.source.file. Alloy's Docker discovery doesn't see native processes.
Expand Down
5 changes: 5 additions & 0 deletions mise-tasks/services/worker-base
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
#MISE depends=["infra:ensure-pg", "infra:wait-for-prerender"]
#MISE dir="packages/realm-server"

# Treat SIGTERM/SIGINT as a clean shutdown so mise / run-p don't report
# 143-on-Ctrl-C as a task failure. See mise-tasks/services/worker for the
# rationale.
trap 'exit 0' INT TERM

NODE_ENV=development \
NODE_NO_WARNINGS=1 \
NODE_OPTIONS="${NODE_OPTIONS:---max-old-space-size=4096}" \
Expand Down
5 changes: 5 additions & 0 deletions mise-tasks/services/worker-test
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
#MISE depends=["infra:ensure-traefik", "infra:ensure-pg", "infra:wait-for-prerender"]
#MISE dir="packages/realm-server"

# Treat SIGTERM/SIGINT as a clean shutdown so mise / run-p don't report
# 143-on-Ctrl-C as a task failure. See mise-tasks/services/worker for the
# rationale.
trap 'exit 0' INT TERM

if [ -n "$ENV_MODE" ]; then
SERVICE_NAME_ARG="--serviceName=worker-test"
MIGRATE_ARG="--migrateDB"
Expand Down
5 changes: 3 additions & 2 deletions packages/host/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"lint:js:fix": "eslint . --report-unused-disable-directives --fix",
"lint:types": "ember-tsc --noEmit",
"ensure-boxel-ui": "../boxel-ui/addon/bin/conditional-build.sh",
"start": "pnpm ensure-boxel-ui && node scripts/vite-serve.js",
"start": "node scripts/vite-serve.js",
"serve:dist": "node scripts/serve-dist.js",
"test": "concurrently \"pnpm:lint\" \"pnpm:test:*\" --names \"lint,test:\"",
"test-with-percy": "percy exec --parallel -- pnpm test:wait-for-servers",
Expand Down Expand Up @@ -191,7 +191,8 @@
"typescript": "catalog:",
"uuid": "catalog:",
"vite": "^8.0.8",
"wait-for-localhost-cli": "catalog:"
"wait-for-localhost-cli": "catalog:",
"wtfnode": "^0.10.1"
},
"dependencies": {
"ember-modify-based-class-resource": "catalog:",
Expand Down
27 changes: 27 additions & 0 deletions packages/host/scripts/vite-serve.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,35 @@
* Wrapper around `vite` (dev server) for `pnpm start`. Delegates to the
* shared launcher, which handles BOXEL_ENVIRONMENT / Traefik registration.
* Mirrors scripts/serve-dist.js, which does the same for `vite preview`.
*
* Runs `ensure-boxel-ui` inline (synchronously, via execFileSync) so that
* the `start` script can be a single `node ...` command rather than
* `pnpm ensure-boxel-ui && node ...`. With `&&` chaining, pnpm runs the
* script through `sh -c`, which has no SIGTERM handler — so on Ctrl-C
* the shell dies via signal even though this Node process exits 0,
* leaving pnpm to report `Command failed with signal "SIGTERM"` and
* `[ERR_PNPM_RECURSIVE_RUN_FIRST_FAIL]`. Running ensure-boxel-ui
* inline keeps Node as pnpm's direct child, so pnpm sees our clean exit.
*/

const { execFileSync } = require('child_process');
const path = require('path');

execFileSync(
path.join(
__dirname,
'..',
'..',
'boxel-ui',
'addon',
'bin',
'conditional-build.sh',
),
{
stdio: 'inherit',
},
);

const { startWithTraefik } = require('./vite-with-traefik');

startWithTraefik({
Expand Down
Loading
Loading