From d92d169ea655412ccacc28d8ae01afef9ea5727a Mon Sep 17 00:00:00 2001 From: Antonin Bas Date: Sun, 19 Apr 2026 18:05:03 -0700 Subject: [PATCH 1/3] Kill E2E server's process group on teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spawn the server with detached: true so it leads its own process group, then signal -pgid on teardown. Previously SIGTERM went to the npx wrapper, which didn't propagate to the inner node child — leaving orphaned tsx server processes on random ports after every e2e run. Also fixes the "close timed out after 10000ms" warning vitest printed at the end of each run. Co-Authored-By: Claude Sonnet 4.6 --- test/e2e/globalSetup.ts | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/test/e2e/globalSetup.ts b/test/e2e/globalSetup.ts index 8234685..d3d7996 100644 --- a/test/e2e/globalSetup.ts +++ b/test/e2e/globalSetup.ts @@ -58,6 +58,7 @@ export async function setup() { env: { ...process.env, KNOTES_HOME: serverHome }, cwd: PROJECT_ROOT, stdio: ["ignore", "pipe", "pipe"], + detached: true, }); serverProcess.stdout?.on("data", () => {}); serverProcess.stderr?.on("data", () => {}); @@ -81,12 +82,16 @@ export async function setup() { process.env["E2E_SERVER_PORT"] = String(port); return async () => { - if (!serverProcess.killed) serverProcess.kill("SIGTERM"); + const pgid = serverProcess.pid; + const killGroup = (sig: NodeJS.Signals) => { + if (pgid == null) return; + try { process.kill(-pgid, sig); } catch {} + }; + killGroup("SIGTERM"); await new Promise((resolve) => { - const onExit = () => resolve(); - serverProcess.once("exit", onExit); + serverProcess.once("exit", () => resolve()); setTimeout(() => { - if (!serverProcess.killed) serverProcess.kill("SIGKILL"); + killGroup("SIGKILL"); setTimeout(resolve, 500); }, 2000); }); From 91b050fcd82103e6d78f44314dd1aafebb5871e6 Mon Sep 17 00:00:00 2001 From: Antonin Bas Date: Sun, 19 Apr 2026 18:25:35 -0700 Subject: [PATCH 2/3] Bind E2E server lifetime to vitest via watchdog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first attempt (process-group SIGTERM on teardown) handled clean shutdown but still leaked when vitest itself died hard (SIGKILL, OOM, runner yanked) — the detached child kept running, reparented to PID 1. Insert a tiny supervisor between vitest and the server. The supervisor: - Spawns the server in its own process group - Polls KNOTES_E2E_ANCHOR_PID (vitest's PID) once a second via process.kill(pid, 0); on ESRCH, tears the server down - Forwards SIGTERM/SIGINT/SIGHUP to the server group on clean teardown Anchor pid is passed via env because intermediate wrappers (npx, the tsx CLI) exit between vitest and the supervisor, making process.ppid unreliable. Verified manually: SIGKILL'ing the vitest process leaves no orphans within ~5s. Co-Authored-By: Claude Sonnet 4.6 --- test/e2e/globalSetup.ts | 10 ++++-- test/e2e/server-supervisor.ts | 64 +++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 test/e2e/server-supervisor.ts diff --git a/test/e2e/globalSetup.ts b/test/e2e/globalSetup.ts index d3d7996..9d3c8df 100644 --- a/test/e2e/globalSetup.ts +++ b/test/e2e/globalSetup.ts @@ -54,8 +54,14 @@ export async function setup() { await prepareHome(serverHome, false); const port = await getFreePort(); - const serverProcess = spawn("npx", ["tsx", "src/main.ts", "server", "--port", String(port)], { - env: { ...process.env, KNOTES_HOME: serverHome }, + const tsxBin = join(PROJECT_ROOT, "node_modules/.bin/tsx"); + const supervisorPath = join(__dirname, "server-supervisor.ts"); + const serverProcess = spawn(tsxBin, [supervisorPath, "--port", String(port)], { + env: { + ...process.env, + KNOTES_HOME: serverHome, + KNOTES_E2E_ANCHOR_PID: String(process.pid), + }, cwd: PROJECT_ROOT, stdio: ["ignore", "pipe", "pipe"], detached: true, diff --git a/test/e2e/server-supervisor.ts b/test/e2e/server-supervisor.ts new file mode 100644 index 0000000..04e029d --- /dev/null +++ b/test/e2e/server-supervisor.ts @@ -0,0 +1,64 @@ +// Watchdog around the e2e server. Spawns the server in its own process +// group, then polls KNOTES_E2E_ANCHOR_PID once a second. If the anchor +// (the vitest process that spawned us) disappears, we tear the server +// down, even if our own SIGTERM never arrived. +// +// Anchor is passed explicitly via env because intermediate wrappers +// (npx, the tsx CLI) exit after spawning us, so process.ppid is unreliable. +import { spawn } from "node:child_process"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const PROJECT_ROOT = join(__dirname, "../.."); + +const anchorPid = Number(process.env["KNOTES_E2E_ANCHOR_PID"]); +if (!Number.isInteger(anchorPid) || anchorPid <= 1) { + console.error("supervisor: missing or invalid KNOTES_E2E_ANCHOR_PID"); + process.exit(2); +} + +const tsxBin = join(PROJECT_ROOT, "node_modules/.bin/tsx"); +const child = spawn(tsxBin, ["src/main.ts", "server", ...process.argv.slice(2)], { + cwd: PROJECT_ROOT, + stdio: "inherit", + detached: true, +}); + +const childPid = child.pid; + +function killGroup(sig: NodeJS.Signals): void { + if (childPid == null) return; + try { process.kill(-childPid, sig); } catch {} +} + +let shuttingDown = false; +function shutdown(reason: string, code = 0): void { + if (shuttingDown) return; + shuttingDown = true; + clearInterval(poll); + console.error(`supervisor: ${reason}`); + killGroup("SIGTERM"); + setTimeout(() => { + killGroup("SIGKILL"); + setTimeout(() => process.exit(code), 200); + }, 2000); +} + +const poll = setInterval(() => { + try { + process.kill(anchorPid, 0); + } catch { + shutdown(`anchor pid ${anchorPid} gone`, 1); + } +}, 1000); + +for (const sig of ["SIGTERM", "SIGINT", "SIGHUP"] as const) { + process.on(sig, () => shutdown(`received ${sig}`)); +} + +child.on("exit", (code, signal) => { + if (shuttingDown) return; + clearInterval(poll); + process.exit(code ?? (signal ? 1 : 0)); +}); From 5a9c63bab6b447dec44edce11958594f71a052be Mon Sep 17 00:00:00 2001 From: Antonin Bas Date: Sun, 19 Apr 2026 18:41:21 -0700 Subject: [PATCH 3/3] Replace E2E supervisor with stdin-EOF watchdog in server Drop the supervisor process and put the parent-death detection in the server itself, gated on KNOTES_E2E_WATCH_STDIN=1. The harness wires its own pipe to the server's stdin and holds the write end for the server's lifetime; the kernel closes that write end the moment the harness process dies (clean exit, SIGKILL, OOM), and the server reads EOF and exits. This is the canonical UNIX equivalent of prctl(PR_SET_PDEATHSIG): the binding is enforced by the kernel, not by a polling loop, and the leaf process detects parent death directly with no intermediary that could itself be killed and break the chain. Verified manually: SIGKILL'ing the vitest worker tears the e2e server down within ~5s, with no orphans left behind. Co-Authored-By: Claude Sonnet 4.6 --- src/cli/commands/server.ts | 19 +++++++++++ test/e2e/globalSetup.ts | 17 +++++++--- test/e2e/server-supervisor.ts | 64 ----------------------------------- 3 files changed, 32 insertions(+), 68 deletions(-) delete mode 100644 test/e2e/server-supervisor.ts diff --git a/src/cli/commands/server.ts b/src/cli/commands/server.ts index aaec43d..345f480 100644 --- a/src/cli/commands/server.ts +++ b/src/cli/commands/server.ts @@ -1,6 +1,23 @@ import type { Command } from "commander"; import { ensureHome, getConfig } from "../../core/config.ts"; +// Test-only watchdog: when KNOTES_E2E_WATCH_STDIN=1 is set, exit as soon +// as stdin closes. The e2e harness wires its own pipe to our stdin and +// keeps the write end open for the server's lifetime; the kernel closes +// it the moment the harness process dies (including SIGKILL/OOM), so +// reading EOF here is a parent-death signal that can't be missed. +function installStdinWatchdog(): void { + if (process.env["KNOTES_E2E_WATCH_STDIN"] !== "1") return; + const exitOnParentGone = () => { + console.error("E2E watchdog: stdin closed, parent gone, exiting"); + process.exit(0); + }; + process.stdin.on("data", () => {}); + process.stdin.on("end", exitOnParentGone); + process.stdin.on("close", exitOnParentGone); + process.stdin.resume(); +} + export function registerServerCommand(program: Command): void { program .command("server") @@ -11,6 +28,8 @@ export function registerServerCommand(program: Command): void { const config = getConfig(); const port = opts.port ? parseInt(opts.port, 10) : config.webPort; + installStdinWatchdog(); + const { createWebServer } = await import("../../web/server.ts"); const server = createWebServer(port); console.log(`Knotes server running at http://localhost:${port}`); diff --git a/test/e2e/globalSetup.ts b/test/e2e/globalSetup.ts index 9d3c8df..f72aada 100644 --- a/test/e2e/globalSetup.ts +++ b/test/e2e/globalSetup.ts @@ -55,15 +55,20 @@ export async function setup() { const port = await getFreePort(); const tsxBin = join(PROJECT_ROOT, "node_modules/.bin/tsx"); - const supervisorPath = join(__dirname, "server-supervisor.ts"); - const serverProcess = spawn(tsxBin, [supervisorPath, "--port", String(port)], { + // stdio[0] is a real pipe (not "ignore") so the server can detect our death + // via EOF on stdin — see installStdinWatchdog in src/cli/commands/server.ts. + // The kernel closes the write end the moment this process exits for any + // reason (clean teardown, SIGKILL, OOM), so the server can never miss it. + // detached:true puts the server in its own process group so we can tear + // the whole subtree (tsx wrapper + server) down in one signal on teardown. + const serverProcess = spawn(tsxBin, ["src/main.ts", "server", "--port", String(port)], { env: { ...process.env, KNOTES_HOME: serverHome, - KNOTES_E2E_ANCHOR_PID: String(process.pid), + KNOTES_E2E_WATCH_STDIN: "1", }, cwd: PROJECT_ROOT, - stdio: ["ignore", "pipe", "pipe"], + stdio: ["pipe", "pipe", "pipe"], detached: true, }); serverProcess.stdout?.on("data", () => {}); @@ -93,6 +98,10 @@ export async function setup() { if (pgid == null) return; try { process.kill(-pgid, sig); } catch {} }; + // Closing stdin triggers the server's EOF watchdog (clean exit path). + // The SIGTERM/SIGKILL fallbacks below cover anything that wasn't + // listening on stdin (e.g. the tsx wrapper between us and the server). + serverProcess.stdin?.end(); killGroup("SIGTERM"); await new Promise((resolve) => { serverProcess.once("exit", () => resolve()); diff --git a/test/e2e/server-supervisor.ts b/test/e2e/server-supervisor.ts deleted file mode 100644 index 04e029d..0000000 --- a/test/e2e/server-supervisor.ts +++ /dev/null @@ -1,64 +0,0 @@ -// Watchdog around the e2e server. Spawns the server in its own process -// group, then polls KNOTES_E2E_ANCHOR_PID once a second. If the anchor -// (the vitest process that spawned us) disappears, we tear the server -// down, even if our own SIGTERM never arrived. -// -// Anchor is passed explicitly via env because intermediate wrappers -// (npx, the tsx CLI) exit after spawning us, so process.ppid is unreliable. -import { spawn } from "node:child_process"; -import { join, dirname } from "node:path"; -import { fileURLToPath } from "node:url"; - -const __dirname = dirname(fileURLToPath(import.meta.url)); -const PROJECT_ROOT = join(__dirname, "../.."); - -const anchorPid = Number(process.env["KNOTES_E2E_ANCHOR_PID"]); -if (!Number.isInteger(anchorPid) || anchorPid <= 1) { - console.error("supervisor: missing or invalid KNOTES_E2E_ANCHOR_PID"); - process.exit(2); -} - -const tsxBin = join(PROJECT_ROOT, "node_modules/.bin/tsx"); -const child = spawn(tsxBin, ["src/main.ts", "server", ...process.argv.slice(2)], { - cwd: PROJECT_ROOT, - stdio: "inherit", - detached: true, -}); - -const childPid = child.pid; - -function killGroup(sig: NodeJS.Signals): void { - if (childPid == null) return; - try { process.kill(-childPid, sig); } catch {} -} - -let shuttingDown = false; -function shutdown(reason: string, code = 0): void { - if (shuttingDown) return; - shuttingDown = true; - clearInterval(poll); - console.error(`supervisor: ${reason}`); - killGroup("SIGTERM"); - setTimeout(() => { - killGroup("SIGKILL"); - setTimeout(() => process.exit(code), 200); - }, 2000); -} - -const poll = setInterval(() => { - try { - process.kill(anchorPid, 0); - } catch { - shutdown(`anchor pid ${anchorPid} gone`, 1); - } -}, 1000); - -for (const sig of ["SIGTERM", "SIGINT", "SIGHUP"] as const) { - process.on(sig, () => shutdown(`received ${sig}`)); -} - -child.on("exit", (code, signal) => { - if (shuttingDown) return; - clearInterval(poll); - process.exit(code ?? (signal ? 1 : 0)); -});